This RMD file contains all coursework questions. Disclaimer: Some graphs might not be well-scaled (especially grid plots)
setwd("~/Desktop/data_files/dataverse_files")
# install.packages("ggplot2")
# install.packages("tidyr")
# install.packages("ragg")
# install.packages("cowplot")
# install.packages("dplyr")
# install.packages("reshape2")
# install.packages("scales")
# install.packages("lubridate)
# install.packages("future")
# install.packages("mlr3")
# install.packages("mlr3verse")
# install.packages("ranger")
library(ggplot2)
library(tidyr)
library(ragg)
library(cowplot) # For grid plots
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(reshape2) # To melt and cast dataframes
##
## Attaching package: 'reshape2'
## The following object is masked from 'package:tidyr':
##
## smiths
library(scales) # For axis labels
library(lubridate) # To convert time
##
## Attaching package: 'lubridate'
## The following object is masked from 'package:cowplot':
##
## stamp
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
# Libraries for machine learning
library(future)
library(mlr3verse)
## Loading required package: mlr3
library(mlr3learners)
library(mlr3pipelines)
library(mlr3tuning)
## Loading required package: paradox
library(mlr3viz)
library(paradox)
library(glmnet)
## Loading required package: Matrix
##
## Attaching package: 'Matrix'
## The following objects are masked from 'package:tidyr':
##
## expand, pack, unpack
## Loaded glmnet 4.1-3
library(ranger)
future::plan()
## sequential:
## - args: function (..., envir = parent.frame())
## - tweaked: FALSE
## - call: NULL
Loading data files – Years 2005 to 2007 and binding it into data
frame years Loading supplementary datasets –
planes and airports
# Creating tables saved in csv files
planes <- read.csv("/Users/celestlee/Desktop/data_files/dataverse_files/plane-data.csv", header = TRUE)
airports <- read.csv("/Users/celestlee/Desktop/data_files/dataverse_files/airports.csv", header = TRUE)
# Might take some time to load!
years <- rbind(read.csv("/Users/celestlee/Desktop/data_files/dataverse_files/2005.csv.bz2"), read.csv("/Users/celestlee/Desktop/data_files/dataverse_files/2006.csv.bz2"), read.csv("/Users/celestlee/Desktop/data_files/dataverse_files/2007.csv.bz2"))
Summary of data frame years
str(years)
## 'data.frame': 21735733 obs. of 29 variables:
## $ Year : int 2005 2005 2005 2005 2005 2005 2005 2005 2005 2005 ...
## $ Month : int 1 1 1 1 1 1 1 1 1 1 ...
## $ DayofMonth : int 28 29 30 31 2 3 4 5 6 7 ...
## $ DayOfWeek : int 5 6 7 1 7 1 2 3 4 5 ...
## $ DepTime : int 1603 1559 1603 1556 1934 2042 2046 NA 2110 1859 ...
## $ CRSDepTime : int 1605 1605 1610 1605 1900 1900 1900 1900 1900 1900 ...
## $ ArrTime : int 1741 1736 1741 1726 2235 9 2357 NA 8 2235 ...
## $ CRSArrTime : int 1759 1759 1805 1759 2232 2232 2232 2232 2223 2223 ...
## $ UniqueCarrier : chr "UA" "UA" "UA" "UA" ...
## $ FlightNum : int 541 541 541 541 542 542 542 542 542 542 ...
## $ TailNum : chr "N935UA" "N941UA" "N342UA" "N326UA" ...
## $ ActualElapsedTime: int 158 157 158 150 121 147 131 NA 118 156 ...
## $ CRSElapsedTime : int 174 174 175 174 152 152 152 152 143 143 ...
## $ AirTime : int 131 136 131 129 106 97 100 NA 101 96 ...
## $ ArrDelay : int -18 -23 -24 -33 3 97 85 NA 105 12 ...
## $ DepDelay : int -2 -6 -7 -9 34 102 106 NA 130 -1 ...
## $ Origin : chr "BOS" "BOS" "BOS" "BOS" ...
## $ Dest : chr "ORD" "ORD" "ORD" "ORD" ...
## $ Distance : int 867 867 867 867 867 867 867 867 867 867 ...
## $ TaxiIn : int 4 6 9 11 5 3 5 0 2 4 ...
## $ TaxiOut : int 23 15 18 10 10 47 26 0 15 56 ...
## $ Cancelled : int 0 0 0 0 0 0 0 1 0 0 ...
## $ CancellationCode : chr "" "" "" "" ...
## $ Diverted : int 0 0 0 0 0 0 0 0 0 0 ...
## $ CarrierDelay : int 0 0 0 0 0 23 46 0 16 0 ...
## $ WeatherDelay : int 0 0 0 0 0 0 0 0 0 0 ...
## $ NASDelay : int 0 0 0 0 0 0 0 0 0 0 ...
## $ SecurityDelay : int 0 0 0 0 0 0 0 0 0 0 ...
## $ LateAircraftDelay: int 0 0 0 0 0 74 39 0 89 0 ...
summary(years)
## Year Month DayofMonth DayOfWeek
## Min. :2005 Min. : 1.00 Min. : 1.00 Min. :1.000
## 1st Qu.:2005 1st Qu.: 4.00 1st Qu.: 8.00 1st Qu.:2.000
## Median :2006 Median : 7.00 Median :16.00 Median :4.000
## Mean :2006 Mean : 6.52 Mean :15.73 Mean :3.943
## 3rd Qu.:2007 3rd Qu.: 9.00 3rd Qu.:23.00 3rd Qu.:6.000
## Max. :2007 Max. :12.00 Max. :31.00 Max. :7.000
##
## DepTime CRSDepTime ArrTime CRSArrTime
## Min. : 1 Min. : 0 Min. : 1 Min. : 0
## 1st Qu.: 931 1st Qu.: 930 1st Qu.:1110 1st Qu.:1116
## Median :1330 Median :1325 Median :1516 Median :1520
## Mean :1341 Mean :1334 Mean :1487 Mean :1497
## 3rd Qu.:1733 3rd Qu.:1721 3rd Qu.:1912 3rd Qu.:1909
## Max. :2930 Max. :2359 Max. :2955 Max. :2400
## NA's :416412 NA's :463805
## UniqueCarrier FlightNum TailNum ActualElapsedTime
## Length:21735733 Min. : 1 Length:21735733 Min. : -66.0
## Class :character 1st Qu.: 586 Class :character 1st Qu.: 75.0
## Mode :character Median :1481 Mode :character Median : 107.0
## Mean :2140 Mean : 125.7
## 3rd Qu.:3364 3rd Qu.: 156.0
## Max. :9619 Max. :1879.0
## NA's :463805
## CRSElapsedTime AirTime ArrDelay DepDelay
## Min. :-1240.0 Min. :-1428.0 Min. :-939.0 Min. :-1200.0
## 1st Qu.: 76.0 1st Qu.: 54.0 1st Qu.: -9.0 1st Qu.: -4.0
## Median : 108.0 Median : 84.0 Median : -1.0 Median : 0.0
## Mean : 126.8 Mean : 102.3 Mean : 8.7 Mean : 10.1
## 3rd Qu.: 156.0 3rd Qu.: 131.0 3rd Qu.: 13.0 3rd Qu.: 9.0
## Max. : 1430.0 Max. : 1958.0 Max. :2598.0 Max. : 2601.0
## NA's :998 NA's :463805 NA's :463805 NA's :416412
## Origin Dest Distance TaxiIn
## Length:21735733 Length:21735733 Min. : 11.0 Min. : 0.000
## Class :character Class :character 1st Qu.: 317.0 1st Qu.: 4.000
## Mode :character Mode :character Median : 569.0 Median : 5.000
## Mean : 723.8 Mean : 7.101
## 3rd Qu.: 950.0 3rd Qu.: 8.000
## Max. :4962.0 Max. :1523.000
##
## TaxiOut Cancelled CancellationCode Diverted
## Min. : 0.00 Min. :0.00000 Length:21735733 Min. :0.00000
## 1st Qu.: 10.00 1st Qu.:0.00000 Class :character 1st Qu.:0.00000
## Median : 13.00 Median :0.00000 Mode :character Median :0.00000
## Mean : 15.83 Mean :0.01916 Mean :0.00218
## 3rd Qu.: 19.00 3rd Qu.:0.00000 3rd Qu.:0.00000
## Max. :1339.00 Max. :1.00000 Max. :1.00000
##
## CarrierDelay WeatherDelay NASDelay SecurityDelay
## Min. : 0.000 Min. : 0.0000 Min. : -49.000 Min. : 0.0000
## 1st Qu.: 0.000 1st Qu.: 0.0000 1st Qu.: 0.000 1st Qu.: 0.0000
## Median : 0.000 Median : 0.0000 Median : 0.000 Median : 0.0000
## Mean : 3.428 Mean : 0.7042 Mean : 3.582 Mean : 0.0248
## 3rd Qu.: 0.000 3rd Qu.: 0.0000 3rd Qu.: 0.000 3rd Qu.: 0.0000
## Max. :2580.000 Max. :1510.0000 Max. :1392.000 Max. :382.0000
##
## LateAircraftDelay
## Min. : 0.000
## 1st Qu.: 0.000
## Median : 0.000
## Mean : 4.436
## 3rd Qu.: 0.000
## Max. :1366.000
##
years data frame and creating
variable statusSetting Month, Year, DayOfWeek
and DayofMonth as factors in data frame
years
years$Month <- as.factor(years$Month)
years$Year <- as.factor(years$Year)
years$DayOfWeek <- as.factor(years$DayOfWeek)
years$DayofMonth <- as.factor(years$DayofMonth)
Adding new column status to years for
flight status
years$status <- NA
years$status[years$Diverted == 1] <- "Diverted"
years$status[years$DepDelay > 0] <- "Delayed"
years$status[years$Cancelled == 1] <- "Cancelled"
years$status <- ifelse(years$Diverted != 1 & years$DepDelay <= 0 &
years$Cancelled != 1, "On Time", years$status)
years$status <- as.factor(years$status)
Summary of column status in data frame
years
summary(years$status)
## Cancelled Delayed Diverted On Time
## 416412 8508330 21996 12788995
status of flights in percentage# Creating data frame `status_perc` with `perc` as percentages of `status`
status_perc <- years %>%
count(status) %>%
mutate(perc = n / nrow(years) * 100)
# Creating plot labels for `perc`
perc_label <- c("0", "20", "40", "60")
# Bar plot
status_perc %>%
ggplot(aes(x = status, y = perc)) +
geom_bar(stat = "identity", fill = "steelblue") +
scale_y_continuous(labels = paste0(perc_label, "%")) +
geom_text(aes(label = paste0(round(perc,2), "%")), position = position_dodge(width = 0.9), vjust = -0.4) +
labs(title = "Percentage of flight status between 2005 to 2007", x = "Flight Status", y = "Percentage") +
theme_bw() +
theme(plot.title = element_text(face = "bold", hjust=0.5, vjust = 1, size = 12)) +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())
# Creating plot labels for `Month`
month_label <- c("Jan","Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov","Dec")
# Creating data frame `month_delay` with variable `delay_rate` to calculate delay rate by month
month_delay <- years %>%
select(Month, status) %>%
group_by(Month) %>%
summarize(num_delays = sum(status == 'Delayed'),
num_flights = n(),
delay_rate = sum(status == 'Delayed') / n())
# Line plot of **delay rate by month**
plot_month <- month_delay %>%
ggplot(aes(x = Month, y = delay_rate, group = 1)) +
geom_line(color = "steelblue") +
geom_point(color = "steelblue") +
scale_x_discrete(labels = month_label) +
labs(title = "Month", y = "Delay Rate") +
theme_classic() +
theme(plot.title = element_text(face = "bold", hjust=0.5, vjust = 1, size = 12),
axis.title.x = element_blank())
plot_month
# Creating data frame `day_delay` with variable `delay_rate` to calculate delay rate by day of month
day_delay <- years %>%
select(DayofMonth, status) %>%
group_by(DayofMonth) %>%
summarize(num_delays = sum(status == 'Delayed'),
num_flights = n(),
delay_rate = sum(status == 'Delayed') / n())
# Line plot of **delay rate by day of month**
plot_day <- day_delay %>%
ggplot(aes(x = DayofMonth, y = delay_rate, group = 1)) +
geom_line(color = "steelblue") +
geom_point(color = "steelblue") +
labs(title = "Day of Month", y = "Delay Rate") +
theme_classic() +
theme(plot.title = element_text(face = "bold", hjust=0.5, vjust = 1, size = 12),
axis.title.x = element_blank())
plot_day
# Creating plot labels for `DayOfWeek`
week_label <- c("Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun")
# Creating data frame `day_delay` with variable `delay_rate` to calculate delay rate by day of week
week_delay <- years %>%
select(DayOfWeek, status) %>%
group_by(DayOfWeek) %>%
summarize(num_delays = sum(status == 'Delayed'),
num_flights = n(),
delay_rate = sum(status == 'Delayed') / n())
# Line plot of **delay rate by day of week**
plot_week <- week_delay %>%
ggplot(aes(x = DayOfWeek, y = delay_rate, group = 1)) +
geom_line(color = "steelblue") +
geom_point(color = "steelblue") +
scale_x_discrete(labels = week_label) +
labs(title = "Day of Week", y = "Delay Rate") +
theme_classic() +
theme(plot.title = element_text(face = "bold", hjust=0.5, vjust = 1, size = 12),
axis.title.x = element_blank())
plot_week
# Creating data frame `all_hours` to categorise all `DepTime` as "Normal" or "Unusual"
all_hours <- years %>%
select(DepTime, status) %>%
drop_na(DepTime) %>%
mutate(dep_hour = ifelse(
nchar(DepTime) == 1 | nchar(DepTime) == 2 | DepTime > 2400,
"Unusual", "Normal"))
# Calculating % of "Unusual" DepTime
perc_hour <- all_hours %>%
count(dep_hour) %>%
mutate(hour_perc = n / nrow(all_hours) * 100)
# Creating plot labels for `hour_perc`
perc_label1 <- c("0", "25", "50", "75", "100")
# Bar plot of % of Normal vs Unusual timings
perc_hour %>%
ggplot(aes(x = dep_hour, y = hour_perc)) +
geom_bar(stat = "identity", fill = "steelblue") +
scale_y_continuous(labels = paste0(perc_label1, "%")) +
geom_text(aes(label = paste0(round(hour_perc,2), "%")), position = position_dodge(width = 0.9), vjust = -0.4) +
labs(title = "Percentage of Normal vs Unusual Departure Timings", x = "", y = "Percentage") +
theme_bw() +
theme(plot.title = element_text(face = "bold", hjust=0.5, vjust = 1, size = 12)) +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())
Since only 0.33% of the departure times from variable
DepTime is categorised as “Unusual”, omitting this data
will neither skew nor affect the result.
# Logical Data cleansing
time_delay <- years %>%
select(DepTime, status) %>%
filter(nchar(DepTime) == 3) %>%
mutate(DepTime = signif(DepTime, 1)) %>%
mutate(DepTime = case_when(
DepTime == 100 ~ "0100",
DepTime == 200 ~ "0200",
DepTime == 300 ~ "0300",
DepTime == 400 ~ "0400",
DepTime == 500 ~ "0500",
DepTime == 600 ~ "0600",
DepTime == 700 ~ "0700",
DepTime == 800 ~ "0800",
DepTime == 900 ~ "0900",
TRUE ~ "1000"))
time_delay$DepTime <- factor(time_delay$DepTime)
time_delay2 <- years %>%
select(DepTime, status) %>%
filter(nchar(DepTime) == 4) %>%
mutate(DepTime = signif(DepTime, 2)) %>%
mutate(DepTime = case_when(
DepTime > 2400 ~ paste0("0",as.character(DepTime - 2400)),
TRUE ~ as.character(DepTime)))
time_delay2$DepTime <- factor(time_delay2$DepTime)
time_delay3 <- years %>%
select(DepTime, status) %>%
filter(nchar(DepTime) == 1 |
nchar(DepTime) == 2) %>%
mutate(DepTime = case_when(
nchar(DepTime) == 1 ~ "2400",
nchar(DepTime) == 2 ~ "2400",
TRUE ~ as.character(DepTime)))
time_delay3$DepTime <- factor(time_delay3$DepTime)
# Data frame of cleaned data `normal_hours`, with `DepTime` factored by hour in `new_hour`
time_delay <- rbind(time_delay, time_delay2, time_delay3)
summary(time_delay$DepTime)
## 0100 0200 0300 0400 0500 0600 0700 0800 0900 1000
## 19762 7746 1996 2116 124711 1314650 1368275 1445383 1350730 1357816
## 1100 1200 1300 1400 1500 1600 1700 1800 1900 2000
## 1332094 1357413 1322576 1304779 1280013 1323622 1363610 1321358 1224149 984434
## 2100 2200 2300 2400
## 801662 418428 188940 103058
# Creating data frame `hour_delay` with variable `delay_rate` to calculate delay rate by hour
time_delay <- time_delay %>%
group_by(DepTime) %>%
summarize(num_delays = sum(status == 'Delayed'),
num_flights = n(),
delay_rate = num_delays / num_flights)
# Line plot of **delay rate by hour**
plot_hour <- time_delay %>%
ggplot(aes(x = DepTime, y = delay_rate, group = 1)) +
geom_line(color = "steelblue") +
geom_point(color = "steelblue") +
labs(title = "Hour", y = "Delay Rate") +
theme_classic() +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
theme(plot.title = element_text(face = "bold", hjust=0.5, vjust = 1, size = 12),
axis.title.x = element_blank())
plot_hour
cowplot packagep <- plot_grid(plot_month, plot_day, plot_week, plot_hour, scale = 1)
title <- ggdraw() + draw_label("Delay Rates Based On", fontface='bold', size = 17)
plot_grid(title, p, ncol=1, rel_heights=c(0.1, 1))
# Removing data frames from Question 1
rm(day_delay, month_delay, time_delay, week_delay)
carrier# Creating data frame `carrier`
carrier <- years %>%
select(UniqueCarrier, TailNum, DepDelay, ArrDelay, Year, CarrierDelay, WeatherDelay, NASDelay, SecurityDelay, LateAircraftDelay)
colnames(carrier)[2] <- "tailnum"
# Removing NA values from `planes` dataset and naming it `plane_year`
planes$year <- as.numeric(planes$year)
## Warning: NAs introduced by coercion
planes <- subset(planes, is.na(year) == F)
summary(planes$year)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0 1992 1999 1996 2002 2008
plane_year <- planes %>%
select(tailnum, year) %>%
filter(year != 0)
# Joining `plane_year` and `carrier` using inner join
colnames(plane_year)[2] <- "planeyear"
carrier <- carrier %>%
inner_join(plane_year) ## joining by tailnum
## Joining, by = "tailnum"
carrier$Year <- as.integer(as.character(carrier$Year))
carriersummary(carrier)
## UniqueCarrier tailnum DepDelay ArrDelay
## Length:16633761 Length:16633761 Min. :-1200.00 Min. :-692.00
## Class :character Class :character 1st Qu.: -4.00 1st Qu.: -9.00
## Mode :character Mode :character Median : 0.00 Median : -1.00
## Mean : 10.16 Mean : 9.05
## 3rd Qu.: 9.00 3rd Qu.: 13.00
## Max. : 2601.00 Max. :2598.00
## NA's :109765 NA's :147012
## Year CarrierDelay WeatherDelay NASDelay
## Min. :2005 Min. : 0.000 Min. : 0.0000 Min. : -49.000
## 1st Qu.:2005 1st Qu.: 0.000 1st Qu.: 0.0000 1st Qu.: 0.000
## Median :2006 Median : 0.000 Median : 0.0000 Median : 0.000
## Mean :2006 Mean : 3.558 Mean : 0.7186 Mean : 3.806
## 3rd Qu.:2007 3rd Qu.: 0.000 3rd Qu.: 0.0000 3rd Qu.: 0.000
## Max. :2007 Max. :2580.000 Max. :1510.0000 Max. :1392.000
##
## SecurityDelay LateAircraftDelay planeyear
## Min. : 0.0000 Min. : 0.00 Min. :1956
## 1st Qu.: 0.0000 1st Qu.: 0.00 1st Qu.:1992
## Median : 0.0000 Median : 0.00 Median :1999
## Mean : 0.0255 Mean : 4.53 Mean :1997
## 3rd Qu.: 0.0000 3rd Qu.: 0.00 3rd Qu.:2002
## Max. :366.0000 Max. :1366.00 Max. :2007
##
str(carrier)
## 'data.frame': 16633761 obs. of 11 variables:
## $ UniqueCarrier : chr "UA" "UA" "UA" "UA" ...
## $ tailnum : chr "N935UA" "N941UA" "N342UA" "N326UA" ...
## $ DepDelay : int -2 -6 -7 -9 34 102 106 -1 -1 17 ...
## $ ArrDelay : int -18 -23 -24 -33 3 97 85 12 -18 17 ...
## $ Year : int 2005 2005 2005 2005 2005 2005 2005 2005 2005 2005 ...
## $ CarrierDelay : int 0 0 0 0 0 23 46 0 0 17 ...
## $ WeatherDelay : int 0 0 0 0 0 0 0 0 0 0 ...
## $ NASDelay : int 0 0 0 0 0 0 0 0 0 0 ...
## $ SecurityDelay : int 0 0 0 0 0 0 0 0 0 0 ...
## $ LateAircraftDelay: int 0 0 0 0 0 74 39 0 0 0 ...
## $ planeyear : num 1992 1992 1988 1988 1990 ...
plane_yearhist <- plane_year %>%
ggplot(aes(x=planeyear)) +
geom_histogram(fill = "steelblue", binwidth = 1) +
labs(title = "Year of Manufacture", y = "Frequency") +
geom_vline(xintercept=c(1980), linetype="dotted") +
theme_bw() +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
theme(axis.title.x = element_blank()) +
theme(plot.title = element_text(hjust = 0.5,size = 13, face = "bold")) +
theme(legend.position = "none")
# Preparing data for line charts
plane_age <- carrier %>%
mutate(age = Year - planeyear) %>%
group_by(age) %>%
summarise(
mean_depdelay = mean(DepDelay, na.rm = TRUE),
mean_arrdelay = mean(ArrDelay, na.rm = TRUE))
summary(plane_age$age)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -2.00 11.25 24.50 24.50 37.75 51.00
plane_age <- plane_age %>%
filter(age > 0) # To remove negative values
# Line chart of Mean departure delay (all planes)
p1 <- ggplot(plane_age, aes(x=age, y=mean_depdelay, group=1)) +
geom_line(color = "blue") +
labs(x = "Years of service", y = "Mean Departure Delay (in minutes)") +
geom_vline(xintercept=c(25), linetype="dotted") +
theme_classic()
# Line charts of Mean arrival delay (all planes)
p2 <- ggplot(plane_age, aes(x=age, y=mean_arrdelay, group=1)) +
geom_line(color = "blue") +
labs(x = "Years of service", y = "Mean Arrival Delay (in minutes)") +
geom_vline(xintercept=c(25), linetype="dotted") +
theme_classic()
# Grid of all plots using `cowplot` package
bottom_row1 <- plot_grid(p1,p2)
plot_grid(hist, bottom_row1, nrow = 2)
# Pre 1980 (Older) Line chart
older_planes <- plane_age %>%
filter(age > 25)
colnames(older_planes) <- c("age", "Mean Departure Delay", "Mean Arrival Delay")
older_planes <- melt(older_planes, id.vars = "age")
plot_cols = c("#e48f1b", "#619ed6")
p3 <- ggplot(older_planes, aes(x = age, y = value)) +
geom_line(aes(color = variable)) +
labs(title = "Planes manufactured before the 1980s" ,x = "Years of service", y = "Mean Delay (in minutes)") +
scale_color_manual(values = plot_cols) +
ylim(2,15) +
theme_bw() +
theme(legend.title=element_blank()) +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
theme(plot.title = element_text(hjust = 0.5,size = 13, face = "bold"))
p3
# Post 1980 (Younger) Line chart
younger_planes <- plane_age %>%
filter(age < 25)
colnames(younger_planes) <- c("age", "Mean Departure Delay", "Mean Arrival Delay")
younger_planes <- melt(younger_planes, id.vars = "age")
p4 <- ggplot(younger_planes, aes(x = age, y = value)) +
geom_line(aes(color = variable)) +
labs(title = "Planes manufactured after the 1980s" ,x = "Years of service", y = "Mean Delay (in minutes)") +
scale_color_manual(values = plot_cols) +
ylim(2,15) +
theme_bw() +
theme(legend.title=element_blank()) +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
theme(plot.title = element_text(hjust = 0.5,size = 13, face = "bold"))
p4
# Grid plot of both line charts
plot_grid(p3,p4)
# Creating new data frame with mean values of all delay factors
delay_factors <- carrier %>%
mutate(age = Year - planeyear) %>%
filter(age > 0) %>%
group_by(age) %>%
summarise(
carrier_delay = mean(CarrierDelay, na.rm = TRUE),
weather_delay = mean(WeatherDelay, na.rm = TRUE),
nas_delay = mean(NASDelay, na.rm = TRUE),
security_delay = mean(SecurityDelay, na.rm = TRUE),
aircraft_delay = mean(LateAircraftDelay, na.rm = TRUE))
colnames(delay_factors) <- c("age", "Carrier Delay", "Weather Delay", "NAS Delay", "Security Delay", "Late Aircraft Delay")
# Data preparation
delay_factors <- melt(delay_factors, id.vars = "age")
delay_factors <- delay_factors %>%
mutate(group = ifelse(
age > 25, "Over 25 Years", "Below 25 Years"))
delay_factors <- dcast(delay_factors, group + variable ~ ., sum)
colnames(delay_factors)[3] <- "value"
grouped_bar <- delay_factors %>%
ggplot(aes(fill=group, y=value, x=variable)) +
geom_bar(position="dodge", stat = "identity", width = 0.7) +
scale_y_continuous(labels = scales::comma) +
scale_color_manual(values = plot_cols) +
labs(title = "Mean Delay based on Delay Factors ", x = "", y = "Mean Delay (minutes)") +
theme_bw() +
theme(legend.title=element_blank()) +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
theme(plot.title = element_text(hjust = 0.5,size = 13, face = "bold"))
grouped_bar
# Creating new data frame with mean values of the top 3 delay factors
delay_lines <- carrier %>%
mutate(age = Year - planeyear) %>%
filter(age > 0) %>%
group_by(age) %>%
summarise(
mean_carrierdelay = mean(CarrierDelay, na.rm = TRUE),
mean_NASdelay = mean(NASDelay, na.rm = TRUE),
mean_aircraftdelay = mean(LateAircraftDelay, na.rm = TRUE))
# 3x1 Line chart
p5 <- ggplot(delay_lines, aes(x=age, y=mean_carrierdelay, group=1)) +
geom_line(color = "blue") +
ylim(0,7) +
labs(x = "Years of service", y = "Mean Carrier Delay (in minutes)") +
theme_classic()
p5
p6 <- ggplot(delay_lines, aes(x=age, y=mean_NASdelay, group=1)) +
geom_line(color = "blue") +
ylim(0,7) +
labs(x = "Years of service", y = "Mean NAS Delay (in minutes)") +
theme_classic()
p6
p7 <- ggplot(delay_lines, aes(x=age, y=mean_aircraftdelay, group=1)) +
geom_line(color = "blue") +
ylim(0,7) +
labs(x = "Years of service", y = "Mean Late Aircraft Delay (in minutes)") +
theme_classic()
p7
# Grid plot of grouped bar chart + 3x1 Line chart
bottom_row2 <- plot_grid(p5, p6, p7, nrow = 1)
plot_grid(grouped_bar, bottom_row2, ncol = 1)
# Removing data frames from Question 2
rm(carrier, plane_age, younger_planes, delay_factors, delay_lines)
od_pairs# Origin-Dest pairs
od_pairs <- years %>%
group_by(Year, Origin, Dest) %>%
summarize(num_trips = n()) %>%
arrange(desc(num_trips))
## `summarise()` has grouped output by 'Year', 'Origin'. You can override using
## the `.groups` argument.
od_pairs$combi <- paste0(od_pairs$Origin,"/",od_pairs$Dest)
od_pairs$combi <- as.factor(od_pairs$combi)
od_pairs$Year <- as.factor(od_pairs$Year)
# New data frame `sum_od` based on total number of trips in each combination
sum_od <- od_pairs %>%
group_by(combi) %>%
mutate(sum_trips = sum(num_trips)) %>%
arrange(desc(sum_trips))
sum_od <- head(sum_od, n = 30) # finding the top 10 OD in 3 years
# Bar chart
sum_od %>%
ggplot(aes(y = combi, x = sum_trips)) +
geom_bar(stat = "identity", width = 0.7, fill = "steelblue") +
scale_x_continuous(labels = comma) +
labs(title = "Origin-Destination Combinations with most trips", x = "Count", y = "Origin/Destination") +
theme_bw() +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
theme(legend.position = "right")
# Filtering data frame to the top 10 OD combinations
top_od_pairs <- od_pairs %>%
filter(combi == "SAN/LAX" |
combi == "OGG/HNL" |
combi == "LGA/DCA" |
combi == "LGA/BOS" |
combi == "LAX/SAN" |
combi == "LAX/LAS" |
combi == "LAS/LAX" |
combi == "HNL/OGG" |
combi == "DCA/LGA" |
combi == "BOS/LGA")
plot_cols = c("#e48f1b", "steelblue", "#aeaeae")
# Stacked bar chart of top 10 ODs
p1 <- top_od_pairs %>%
ggplot(aes(fill = Year, y = combi, x = num_trips)) +
geom_bar(position = "stack", stat = "identity", width = 0.7) +
scale_x_continuous(labels = comma) +
scale_fill_manual(values = plot_cols) +
labs(title = "Top 10 Origin-Destination Combinations", x = "Number of trips", y = "Origin/Destination") +
theme_bw() +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
theme(plot.title = element_text(hjust = 0.5,size = 13, face = "bold"))
p1
# Manipulating of dataset
unusual_pairs <- od_pairs %>%
filter(Year == 2005) %>%
mutate(num_2005 = num_trips)
unusual_pairs = subset(unusual_pairs, select = c("combi", "num_2005"))
unusual_pairs1 <- od_pairs %>%
filter(Year == 2006) %>%
mutate(num_2006 = num_trips)
unusual_pairs1 = subset(unusual_pairs1, select = c("combi", "num_2006"))
unusual_pairs2 <- od_pairs %>%
filter(Year == 2007) %>%
mutate(num_2007 = num_trips)
unusual_pairs2 = subset(unusual_pairs2, select = c("combi", "num_2007"))
# Using inner join to combine all 3 datasets and retrieving top 10 values
odd_pairs <- unusual_pairs %>%
inner_join(unusual_pairs1) %>%
inner_join(unusual_pairs2)
## Joining, by = "combi"
## Joining, by = "combi"
odd_pairs$max <- pmax(odd_pairs$num_2005, odd_pairs$num_2006, odd_pairs$num_2007)
odd_pairs$min <- pmin(odd_pairs$num_2005, odd_pairs$num_2006, odd_pairs$num_2007)
odd_pairs$max_diff <- odd_pairs$max - odd_pairs$min
odd_pairs1 <- odd_pairs %>%
arrange(desc(max_diff))
odd_pairs1 <- head(odd_pairs1, n = 10)
# More data manipulation
odd_pairs1 <- melt(odd_pairs1, id.vars = "combi")
odd_pairs1 <- odd_pairs1 %>%
mutate(case_when(
variable == "num_2005" ~ "2005",
variable == "num_2006" ~ "2006",
variable == "num_2007" ~ "2007"))
colnames(odd_pairs1) <- c("combi", "variable", "value", "Year")
odd_pairs1 <- odd_pairs1 %>%
filter(variable != "max_diff")
odd_pairs1 <- head(odd_pairs1, n = 30)
# Stacked bar chart
p2 <- odd_pairs1 %>%
ggplot(aes(fill = Year, y = combi, x = value)) +
geom_bar(position = "stack", stat = "identity", width = 0.7) +
scale_x_continuous(labels = comma) +
scale_fill_manual(values = plot_cols) +
labs(title = "Origin-Destination Combinations with significant changes in traffic", x = "Number of trips", y = "Origin/Destination") +
theme_bw() +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
theme(plot.title = element_text(hjust = 0.5,size = 13, face = "bold"))
p2
cowplot packageplot_grid(p1,p2)
line_pairs <- years %>%
select(Year, Month, DayofMonth, Origin, Dest) %>%
group_by(Year, Month, DayofMonth, Origin, Dest) %>%
summarize(num_trips = n()) %>%
arrange(desc(num_trips))
## `summarise()` has grouped output by 'Year', 'Month', 'DayofMonth', 'Origin'.
## You can override using the `.groups` argument.
line_pairs$combi <- paste0(line_pairs$Origin,"/",line_pairs$Dest)
line_pairs$combi <- as.factor(line_pairs$combi)
line_pairs$Year <- as.factor(line_pairs$Year)
top_5 <- line_pairs %>%
filter(combi == "SAN/LAX" |
combi == "LAX/SAN" |
combi == "LAX/LAS" |
combi == "LAS/LAX" |
combi == "BOS/LGA")
top_5$date <- as.Date(paste0(top_5$Year,"-",top_5$Month,"-",top_5$DayofMonth))
top_5 <- top_5 %>%
group_by(month = floor_date(date, unit = "month"))
mean_line_pairs <- top_5 %>%
group_by(combi, month) %>%
summarise(
mean_trips = mean(num_trips)
)
## `summarise()` has grouped output by 'combi'. You can override using the
## `.groups` argument.
plot_cols2 = c("steelblue", "#e48f1b", "#aeaeae", "#f7d027", "steelblue2")
p3 <- ggplot(mean_line_pairs, aes(x = month, y = mean_trips, color = combi)) +
geom_line(size = 0.5) +
scale_color_manual(values = plot_cols2) +
labs(title = "Traffic of Top 5 Origin-Destination Combinations over 3 years" , x = "", y = "Mean number of trips") +
theme_bw() +
theme(legend.title=element_blank()) +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
theme(plot.title = element_text(hjust = 0.5,size = 13, face = "bold"))
p3
change_5 <- line_pairs %>%
filter(combi == "OGG/HNL" |
combi == "HNL/OGG" |
combi == "KOA/HNL" |
combi == "HNL/LIH" |
combi == "HNL/KOA")
change_5$date <- as.Date(paste0(change_5$Year,"-",change_5$Month,"-",change_5$DayofMonth))
change_5 <- change_5 %>%
group_by(month = floor_date(date, unit = "month"))
mean_change_pairs <- change_5 %>%
group_by(combi, month) %>%
summarise(
mean_trips = mean(num_trips))
## `summarise()` has grouped output by 'combi'. You can override using the
## `.groups` argument.
p4 <- ggplot(mean_change_pairs, aes(x = month, y = mean_trips, color = combi)) +
geom_line(size = 0.5) +
scale_color_manual(values = plot_cols2) +
labs(title = "Traffic of Origin-Destination Combinations with significant increases over 3 years" , x = "", y = "Mean number of trips") +
theme_bw() +
theme(legend.title=element_blank()) +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
theme(plot.title = element_text(hjust = 0.5,size = 13, face = "bold"))
p4
cowplot packageplot_grid(p3,p4)
# Removing data frames from Question 3
rm(od_pairs, sum_od, top_od_pairs, unusual_pairs, unusual_pairs1, unusal_pairs2, odd_pairs, odd_pairs1, line_pairs, top5)
## Warning in rm(od_pairs, sum_od, top_od_pairs, unusual_pairs, unusual_pairs1, :
## object 'unusal_pairs2' not found
## Warning in rm(od_pairs, sum_od, top_od_pairs, unusual_pairs, unusual_pairs1, :
## object 'top5' not found
# Setting variables as factors
years$Month <- as.factor(years$Month)
years$DayofMonth <- as.factor(years$DayofMonth)
# Adding new column to data for flight status
years$dep_delayed <- "No"
years$dep_delayed[years$DepDelay > 0] <- "Yes"
years$arr_delayed <- "No"
years$arr_delayed[years$ArrDelay > 0] <- "Yes"
years$dep_delayed <- as.factor(years$dep_delayed)
years$arr_delayed <- as.factor(years$arr_delayed)
summary(years$dep_delayed)
## No Yes
## 13227403 8508330
summary(years$arr_delayed)
## No Yes
## 12021690 9714043
# plot for delay rate per month (from question 1)
month_label <- c("Jan","Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov","Dec")
month_delay <- years %>%
select(Month, dep_delayed, arr_delayed) %>%
group_by(Month) %>%
summarise(num_delays = sum(dep_delayed == 'Yes' | arr_delayed == 'Yes'),
num_flights = n(),
delay_rate = num_delays / n())
p1 <- month_delay %>%
ggplot(aes(x = Month, y = delay_rate, group = 1)) +
geom_line(color = "steelblue") +
geom_point(color = "steelblue") +
scale_x_discrete(labels = month_label) +
labs(title = "Delay Rate per Month", y = "Delay Rate (in minutes)") +
theme_classic() +
theme(plot.title = element_text(face = "bold", hjust=0.5, vjust = 1, size = 12),
axis.title.x = element_blank())
p1
bar_grouped <- years %>%
select(Month, Year) %>%
group_by(Month, Year) %>%
summarise(num_flights = n())
## `summarise()` has grouped output by 'Month'. You can override using the
## `.groups` argument.
bar_grouped$Year <- factor(bar_grouped$Year)
plot_cols = c("#e48f1b", "steelblue", "#aeaeae")
p2 <- bar_grouped %>%
ggplot(aes(fill = Year, y = num_flights, x = Month)) +
geom_bar(position="stack", stat = "identity", width = 0.7) +
scale_fill_manual(values = plot_cols) +
scale_x_discrete(labels = month_label) +
scale_y_continuous(labels = scales::comma) +
labs(title = "Number of flights", x = "", y = "Frequency") +
theme_bw() +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
theme(plot.title = element_text(hjust = 0.5,size = 13, face = "bold"))
# Grid plot using `cowplot` package
plot_grid(p1,p2, align = "h")
first_month <- years %>%
select(Month, dep_delayed, arr_delayed, DayofMonth) %>%
filter(Month == 12) %>%
group_by(DayofMonth) %>%
summarise(num_arr = sum(arr_delayed == 'Yes'),
num_flights = n(),
arr_rate = num_arr / n(),
num_dep = sum(dep_delayed == 'Yes'),
num_flights = n(),
dep_rate = num_dep / n())
first_month <- first_month[c(1,4,6)]
colnames(first_month) <- c("DayofMonth", "Arrival Delay Rate", "Departure Delay Rate")
first_month <- melt(first_month, id.vars = "DayofMonth")
str(first_month)
## 'data.frame': 62 obs. of 3 variables:
## $ DayofMonth: Factor w/ 31 levels "1","2","3","4",..: 1 2 3 4 5 6 7 8 9 10 ...
## $ variable : Factor w/ 2 levels "Arrival Delay Rate",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ value : num 0.48 0.488 0.432 0.479 0.454 ...
p3 <- ggplot(first_month, aes(x = DayofMonth, y = value, group = variable)) +
geom_line(aes(color = variable)) +
labs(title = "Delay Rate in December" , x = "", y = "") +
scale_color_manual(values = plot_cols) +
theme_bw() +
theme(legend.title=element_blank()) +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
theme(plot.title = element_text(hjust = 0.5,size = 13, face = "bold"))
p3
# Logical data cleaning
day_aug <- years %>%
select(Month, DayofMonth, DepTime, CRSDepTime, DepDelay, dep_delayed, arr_delayed) %>%
filter(Month == 12,
DayofMonth == 22 |
DayofMonth == 23) %>%
drop_na(DepTime) %>%
filter(nchar(DepTime) > 2)
day_aug1 <- day_aug %>%
filter(nchar(DepTime) == 3) %>%
mutate(DepTime = signif(DepTime, 1)) %>%
mutate(DepTime = case_when(
DepTime == 100 ~ "0100",
DepTime == 200 ~ "0200",
DepTime == 300 ~ "0300",
DepTime == 400 ~ "0400",
DepTime == 500 ~ "0500",
DepTime == 600 ~ "0600",
DepTime == 700 ~ "0700",
DepTime == 800 ~ "0800",
DepTime == 900 ~ "0900",
TRUE ~ "1000"))
day_aug2 <- day_aug %>%
filter(nchar(DepTime) == 4) %>%
mutate(DepTime = signif(DepTime, 2)) %>%
mutate(DepTime = case_when(
DepTime == 2400 ~ "2400",
DepTime > 2400 ~ paste0("0",as.character(DepTime - 2400)),
TRUE ~ as.character(DepTime)))
day_aug <- rbind(day_aug1, day_aug2)
day_aug$DepTime <- factor(day_aug$DepTime)
summary(day_aug$DepTime)
## 0100 0200 0300 0400 0500 0600 0700 0800 0900 1000 1100 1200 1300 1400 1500 1600
## 245 98 31 18 543 6594 7144 7454 6971 6898 7034 7066 6739 6944 6794 6998
## 1700 1800 1900 2000 2100 2200 2300 2400
## 7159 6980 6445 5518 4567 2923 1544 223
day_delay <- day_aug %>%
select(DayofMonth, DepTime, dep_delayed, arr_delayed) %>%
group_by(DepTime, DayofMonth) %>%
summarise(num_arr = sum(arr_delayed == 'Yes'),
num_flights = n(),
arr_rate = num_arr / n(),
num_dep = sum(dep_delayed == 'Yes'),
num_flights = n(),
dep_rate = num_dep / n())
## `summarise()` has grouped output by 'DepTime'. You can override using the
## `.groups` argument.
day_delay$DepTime <- as.character(day_delay$DepTime)
day_delay <- day_delay[c(1,2,5,7)]
colnames(day_delay) <- c("DepTime", "DayofMonth", "Arrival Delay Rate", "Departure Delay Rate")
day_delay$DepTime <- factor(day_delay$DepTime)
day_delay <- melt(day_delay, id.vars = c("DepTime", "DayofMonth"))
str(day_delay)
## 'data.frame': 96 obs. of 4 variables:
## $ DepTime : Factor w/ 24 levels "0100","0200",..: 1 1 2 2 3 3 4 4 5 5 ...
## $ DayofMonth: Factor w/ 31 levels "1","2","3","4",..: 22 23 22 23 22 23 22 23 22 23 ...
## $ variable : Factor w/ 2 levels "Arrival Delay Rate",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ value : num 0.909 0.926 0.861 0.919 0.833 ...
p4 <- day_delay %>%
filter(DayofMonth == 22) %>%
arrange(DepTime) %>%
ggplot(aes(x = DepTime, y = value, group = variable)) +
geom_line(aes(color = variable)) +
labs(title = "Delay Rate on 22nd December" , x = "", y = "") +
scale_color_manual(values = plot_cols) +
theme_bw() +
theme(legend.title=element_blank()) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
theme(plot.title = element_text(hjust = 0.5,size = 13, face = "bold"))
p4
cowplot packageplot_grid(p3, p4, ncol=1)
check <- years %>%
select(Year, Month, DayofMonth, CRSDepTime, DepTime, CRSArrTime, ArrTime, TailNum, FlightNum, Origin, Dest, arr_delayed, dep_delayed) %>%
filter(Year == 2005,
Month == 12,
DayofMonth == 22,
arr_delayed == "Yes",
dep_delayed == "Yes",
DepTime > 1100,
DepTime < 1300)
# Found 2 examples of 1st degree cascading failures due to delays
check <- check %>%
filter(TailNum == "N957SW" |
TailNum == "N835AE")
summary(check)
## Year Month DayofMonth CRSDepTime DepTime CRSArrTime
## 2005:4 12 :4 22 :4 Min. :1010 Min. :1105 Min. :1116
## 2006:0 1 :0 1 :0 1st Qu.:1014 1st Qu.:1105 1st Qu.:1119
## 2007:0 2 :0 2 :0 Median :1080 Median :1172 Median :1268
## 3 :0 3 :0 Mean :1082 Mean :1176 Mean :1276
## 4 :0 4 :0 3rd Qu.:1148 3rd Qu.:1242 3rd Qu.:1426
## 5 :0 5 :0 Max. :1155 Max. :1253 Max. :1454
## (Other):0 (Other):0
## ArrTime TailNum FlightNum Origin
## Min. :1213 Length:4 Min. :4688 Length:4
## 1st Qu.:1215 Class :character 1st Qu.:4688 Class :character
## Median :1332 Mode :character Median :5559 Mode :character
## Mean :1357 Mean :5612
## 3rd Qu.:1474 3rd Qu.:6483
## Max. :1550 Max. :6642
##
## Dest arr_delayed dep_delayed
## Length:4 No :0 No :0
## Class :character Yes:4 Yes:4
## Mode :character
##
##
##
##
# Removing data frames from Question 4
rm(month_delay, bar_grouped, first_month, day_aug, day_aug1, day_delay, check)
I will be building a model to predict DepDelay, or
departure delay.
# Checking missing values (missing values or empty values)
colSums(is.na(years)|years=='')
## Year Month DayofMonth DayOfWeek
## 0 0 0 0
## DepTime CRSDepTime ArrTime CRSArrTime
## 416412 0 463805 0
## UniqueCarrier FlightNum TailNum ActualElapsedTime
## 0 0 22 463805
## CRSElapsedTime AirTime ArrDelay DepDelay
## 998 463805 463805 416412
## Origin Dest Distance TaxiIn
## 0 0 0 0
## TaxiOut Cancelled CancellationCode Diverted
## 0 0 21319319 0
## CarrierDelay WeatherDelay NASDelay SecurityDelay
## 0 0 0 0
## LateAircraftDelay status dep_delayed arr_delayed
## 0 0 0 0
# Creating new feature for status of flights
years$status <- NA
years$status[years$Diverted == 1] <- "Diverted"
years$status[years$DepDelay > 0] <- "Delayed"
years$status[years$Cancelled == 1] <- "Cancelled"
years$status <- ifelse(years$Diverted != 1 & years$DepDelay <= 0 &
years$Cancelled != 1, "On Time", years$status)
years$status <- as.factor(years$status)
CRSDepTime and
CRSArrTimesummary(years$CRSDepTime) ## Based on data frame, timings with "0" refers to 24:00, or 00:00
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0 930 1325 1334 1721 2359
summary(years$CRSArrTime) ## Based on data frame, timings with "0" refers to 24:00, or 00:00
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0 1116 1520 1497 1909 2400
main <- years %>% ## Included a few rows for convenience -- For machine learning section
select(TailNum, Year, CRSDepTime, CRSArrTime, DepDelay, CRSElapsedTime, status) %>%
drop_na(DepDelay) ## NA values are because flight is cancelled
summary(main)
## TailNum Year CRSDepTime CRSArrTime
## Length:21319321 2005:7006866 Min. : 0 Min. : 0
## Class :character 2006:7019988 1st Qu.: 930 1st Qu.:1115
## Mode :character 2007:7292467 Median :1325 Median :1520
## Mean :1333 Mean :1496
## 3rd Qu.:1720 3rd Qu.:1908
## Max. :2359 Max. :2400
##
## DepDelay CRSElapsedTime status
## Min. :-1200.00 Min. : -97.0 Cancelled: 0
## 1st Qu.: -4.00 1st Qu.: 76.0 Delayed : 8508330
## Median : 0.00 Median : 109.0 Diverted : 21996
## Mean : 10.07 Mean : 127.1 On Time :12788995
## 3rd Qu.: 9.00 3rd Qu.: 157.0
## Max. : 2601.00 Max. :1031.0
## NA's :727
## Analysis:
## Single digit CRSDepTime represents the minutes in the hour 2400, eg CRSDepTime == 8 = 2408; same thing for DepTime. Timings over 2400 indicates next day -- hence take (Time - 2400)
Data cleaning for CRSDepTime and
CRSArrTime
# Rounding off values to nearest hour and making it a factor from **0100 to 1000** (3 digit timings)
crs1 <- main %>%
filter(nchar(CRSArrTime) == 3) %>%
mutate(CRSArrTime = signif(CRSArrTime, 1)) %>%
mutate(CRSArrTime = case_when(
CRSArrTime == 100 ~ "0100",
CRSArrTime == 200 ~ "0200",
CRSArrTime == 300 ~ "0300",
CRSArrTime == 400 ~ "0400",
CRSArrTime == 500 ~ "0500",
CRSArrTime == 600 ~ "0600",
CRSArrTime == 700 ~ "0700",
CRSArrTime == 800 ~ "0800",
CRSArrTime == 900 ~ "0900",
TRUE ~ "1000"))
crs1$CRSArrTime <- factor(crs1$CRSArrTime)
crs2 <- main %>%
filter(nchar(CRSDepTime) == 3) %>%
mutate(CRSDepTime = signif(CRSDepTime, 1)) %>%
mutate(CRSDepTime = case_when(
CRSDepTime == 100 ~ "0100",
CRSDepTime == 200 ~ "0200",
CRSDepTime == 300 ~ "0300",
CRSDepTime == 400 ~ "0400",
CRSDepTime == 500 ~ "0500",
CRSDepTime == 600 ~ "0600",
CRSDepTime == 700 ~ "0700",
CRSDepTime == 800 ~ "0800",
CRSDepTime == 900 ~ "0900",
TRUE ~ "1000"))
crs2$CRSDepTime <- factor(crs2$CRSDepTime)
# Rounding off values to nearest hour and making it a factor from **1000 to 2400** (4 digit timings)
crs3 <- main %>%
filter(nchar(CRSDepTime) == 4) %>%
arrange(CRSDepTime) %>%
mutate(CRSDepTime = signif(CRSDepTime, 2))
crs4 <- main %>%
filter(nchar(CRSArrTime) == 4) %>%
arrange(CRSArrTime) %>%
mutate(CRSArrTime = signif(CRSArrTime, 2))
crs3$CRSDepTime <- factor(crs3$CRSDepTime)
crs4$CRSArrTime <- factor(crs4$CRSArrTime)
# Cleaning of single digit timings and rounding it to **2400*
crs5 <- main %>%
filter(nchar(CRSDepTime) == 1 |
nchar(CRSDepTime) == 2) %>%
mutate(CRSDepTime = case_when(
nchar(CRSDepTime) == 1 ~ "2400",
nchar(CRSDepTime) == 2 ~ "2400",
TRUE ~ as.character(CRSDepTime)))
crs6 <- main %>%
filter(nchar(CRSArrTime) == 1 |
nchar(CRSArrTime) == 2) %>%
mutate(CRSArrTime = case_when(
nchar(CRSArrTime) == 1 ~ "2400",
nchar(CRSArrTime) == 2 ~ "2400",
TRUE ~ as.character(CRSArrTime)))
crs5$CRSDepTime <- factor(crs5$CRSDepTime)
crs6$CRSArrTime <- factor(crs6$CRSArrTime)
# Binding of all dataframes together
arr <- rbind(crs1, crs4, crs6)
dep <- rbind(crs2, crs3, crs5)
rm(crs1, crs2, crs3, crs4, crs5, crs6)
CRSDepTime and CRSArrTime against
mean departure delay rates# CRSDepTime against mean departure delay rates
dep1 <- dep %>%
group_by(CRSDepTime) %>%
summarise(mean_delay = mean(DepDelay, na.rm = TRUE))
# CRSArrTime against mean departure delay rates
arr1 <- arr %>%
group_by(CRSArrTime) %>%
summarise(mean_delay = mean(DepDelay, na.rm = TRUE)) %>%
arrange(CRSArrTime)
p1 <- ggplot(dep1, aes(x = CRSDepTime, y = mean_delay, group = 1)) +
geom_line(color = "steelblue", size = 0.7) +
geom_point(color = "steelblue") +
labs(title = "Scheduled Departure Time" , x = "", y = "Mean Delay (minutes)") +
ylim(0,20) +
theme_classic() +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
theme(plot.title = element_text(face = "bold", hjust=0.5, vjust = 1, size = 12),
axis.title.x = element_blank())
p2 <- ggplot(arr1, aes(x = CRSArrTime, y = mean_delay, group = 1)) +
geom_line(color = "steelblue", size = 0.7) +
geom_point(color = "steelblue") +
labs(title = "Scheduled Arrival Time" , x = "", y = "Mean Delay (minutes)") +
ylim(0,20) +
theme_classic() +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
theme(plot.title = element_text(face = "bold", hjust=0.5, vjust = 1, size = 12),
axis.title.x = element_blank())
title <- ggdraw() + draw_label("Mean Departure Delay based on",
fontface='bold', size = 17)
p <- plot_grid(p1,p2)
plot_grid(title, p, ncol=1, rel_heights=c(0.1, 1))
age of Planes(See question 2 plot) I will be including age of planes as I have concluded that the delay rate of planes increases and peaks when a plane is at 25 years of service, and decreases thereafter.
CRSElapsedTimecrs_elapsed <- main[c(1:100000),] ## Taking a proportion of data
crs_elapsed <- crs_elapsed %>%
drop_na(CRSElapsedTime)
# Scatterplot of ArrDelay against DepDelay
ggplot(crs_elapsed, aes(x=DepDelay, y=CRSElapsedTime)) +
geom_point(alpha = 0.2, size = 0.3) +
labs(title = "Scatter Plot of Scheduled Elapsed Time against Departure Delay (minutes)",
x = "Departure Delay", y = "Scheduled Elapsed Time") +
theme_classic() +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
theme(plot.title = element_text(face = "bold", hjust=0.5, vjust = 1, size = 12))
CRSDepTime, CRSArrTime,
age (of plane), CRSElapsedTime are variables
that will be included.Plane manufacturer can’t be used as it contains too many NA’s, imputing large proportion of dataset will cause inaccuracy.
originaloriginal_plane <- planes %>%
select(tailnum, year)
dep = subset(dep, select = c("TailNum", "Year", "CRSDepTime", "DepDelay", "CRSElapsedTime"))
arr = subset(arr, select = c("TailNum", "Year", "CRSArrTime", "DepDelay", "CRSElapsedTime"))
original <- dep %>%
inner_join(arr)
## Joining, by = c("TailNum", "Year", "DepDelay", "CRSElapsedTime")
colnames(original)[1] <- "tailnum"
original <- original %>%
inner_join(original_plane)
## Joining, by = "tailnum"
str(original)
## 'data.frame': 76736905 obs. of 7 variables:
## $ tailnum : chr "N427UA" "N449UA" "N449UA" "N433UA" ...
## $ Year : Factor w/ 3 levels "2005","2006",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ CRSDepTime : Factor w/ 24 levels "0100","0200",..: 8 8 8 8 8 8 6 6 6 6 ...
## $ DepDelay : int -4 0 0 -6 13 2 14 3 -4 -5 ...
## $ CRSElapsedTime: int 197 197 197 197 197 197 112 112 112 112 ...
## $ CRSArrTime : Factor w/ 24 levels "0100","0200",..: 11 11 16 11 11 11 7 7 7 7 ...
## $ year : num 1995 1998 1998 1996 1995 ...
original$Year <- as.numeric(as.character(original$Year))
original$year <- as.numeric(original$year)
summary(original) # year(plane) has '0' and several NA's
## tailnum Year CRSDepTime DepDelay
## Length:76736905 Min. :2005 0800 : 6225911 Min. :-1200.00
## Class :character 1st Qu.:2005 0700 : 5748525 1st Qu.: -4.00
## Mode :character Median :2006 1000 : 5502320 Median : 0.00
## Mean :2006 0600 : 5491855 Mean : 1.18
## 3rd Qu.:2007 0900 : 5209006 3rd Qu.: 0.00
## Max. :2007 1200 : 5096041 Max. : 2601.00
## (Other):43463247
## CRSElapsedTime CRSArrTime year
## Min. : -97.00 1000 : 6101454 Min. : 0
## 1st Qu.: 60.00 0800 : 5837636 1st Qu.:1994
## Median : 75.00 1200 : 5645913 Median :1997
## Mean : 86.89 0900 : 5303718 Mean :1994
## 3rd Qu.: 100.00 1400 : 5091362 3rd Qu.:2001
## Max. :1031.00 1600 : 5084999 Max. :2007
## NA's :1 (Other):43671823
# Imputing median value
original$year[is.na(original$year)] = median(original$year, na.rm=TRUE)
original$CRSElapsedTime[is.na(original$CRSElapsedTime)] = median(original$CRSElapsedTime, na.rm=TRUE)
original <- original %>%
filter(year > 0) %>%
mutate(age = Year - year) %>%
filter(age > 0) # There will be several negative values, also a small proportion, cleaning up data
# Encoding factors
original <- original %>%
mutate_if(is.character, as.factor)
original <- original %>%
mutate_if(is.factor, as.numeric)
summary(original)
## tailnum Year CRSDepTime DepDelay
## Min. : 1 Min. :2005 Min. : 1.00 Min. :-1200.000
## 1st Qu.:1618 1st Qu.:2005 1st Qu.: 9.00 1st Qu.: -4.000
## Median :2357 Median :2006 Median :12.00 Median : 0.000
## Mean :2229 Mean :2006 Mean :12.72 Mean : 1.159
## 3rd Qu.:2798 3rd Qu.:2007 3rd Qu.:17.00 3rd Qu.: 0.000
## Max. :4171 Max. :2007 Max. :24.00 Max. : 2601.000
## CRSElapsedTime CRSArrTime year age
## Min. : -97.00 Min. : 1.00 Min. :1956 Min. : 1.000
## 1st Qu.: 60.00 1st Qu.:10.00 1st Qu.:1994 1st Qu.: 5.000
## Median : 75.00 Median :14.00 Median :1997 Median : 8.000
## Mean : 86.62 Mean :14.07 Mean :1996 Mean : 9.768
## 3rd Qu.: 100.00 3rd Qu.:18.00 3rd Qu.:2001 3rd Qu.:13.000
## Max. :1031.00 Max. :24.00 Max. :2006 Max. :51.000
# Checking for missing values
colSums(is.na(original)|original=='')
## tailnum Year CRSDepTime DepDelay CRSElapsedTime
## 0 0 0 0 0
## CRSArrTime year age
## 0 0 0
str(original)
## 'data.frame': 76010596 obs. of 8 variables:
## $ tailnum : num 1496 1606 1606 1524 1490 ...
## $ Year : num 2005 2005 2005 2005 2005 ...
## $ CRSDepTime : num 8 8 8 8 8 8 6 6 6 6 ...
## $ DepDelay : int -4 0 0 -6 13 2 14 3 -4 -5 ...
## $ CRSElapsedTime: num 197 197 197 197 197 197 112 112 112 112 ...
## $ CRSArrTime : num 11 11 16 11 11 11 7 7 7 7 ...
## $ year : num 1995 1998 1998 1996 1995 ...
## $ age : num 10 7 7 9 10 7 14 13 14 17 ...
# Subsetting to columns needed
original <- original[c(3:6,8)]
str(original)
## 'data.frame': 76010596 obs. of 5 variables:
## $ CRSDepTime : num 8 8 8 8 8 8 6 6 6 6 ...
## $ DepDelay : int -4 0 0 -6 13 2 14 3 -4 -5 ...
## $ CRSElapsedTime: num 197 197 197 197 197 197 112 112 112 112 ...
## $ CRSArrTime : num 11 11 16 11 11 11 7 7 7 7 ...
## $ age : num 10 7 7 9 10 7 14 13 14 17 ...
task_original <- TaskRegr$new(original, id = "Delay", target = "DepDelay")
print(task_original)
## <TaskRegr:Delay> (76010596 x 5)
## * Target: DepDelay
## * Properties: -
## * Features (4):
## - dbl (4): CRSArrTime, CRSDepTime, CRSElapsedTime, age
task_original$feature_names
## [1] "CRSArrTime" "CRSDepTime" "CRSElapsedTime" "age"
task_original$target_names
## [1] "DepDelay"
task_original$filter(rows = 1:100000)
task_original$select(setdiff(task_original$feature_names, "DepDelay"))
measure <- msr("regr.mse")
learner_lm <- lrn("regr.lm")
gr_lm <- po("imputemean") %>>%
po(learner_lm)
glrn_lm <- GraphLearner$new(gr_lm)
set.seed(1)
train_set <- sample(task_original$nrow, 0.7 * task_original$nrow)
test_set <- setdiff(seq_len(task_original$nrow), train_set)
glrn_lm$train(task_original, row_ids = train_set)
glrn_lm$predict(task_original, row_ids = test_set)$score()
## regr.mse
## 78.64405
### regr.mse = 70.12866
learner_ridge <- lrn("regr.glmnet")
learner_ridge$param_set$values <- list(alpha = 0, lambda = 0.001)
gr_ridge <- po("scale") %>>%
po("imputemean") %>>%
po(learner_ridge)
glrn_ridge <- GraphLearner$new(gr_ridge)
glrn_ridge$train(task_original, row_ids = train_set)
glrn_ridge$predict(task_original, row_ids = test_set)$score()
## regr.mse
## 78.64404
### regr.mse = 70.12547
learner_ridge2 <- lrn("regr.glmnet")
learner_ridge2$param_set$values <- list(alpha = 0)
gr_ridge2 <- po("scale") %>>%
po("imputemean") %>>%
po(learner_ridge2)
glrn_ridge2 <- GraphLearner$new(gr_ridge2)
tune_lambda <- ParamSet$new (list(
ParamDbl$new("regr.glmnet.lambda", lower = 0.03, upper = 2)
))
tuner <- tnr("grid_search")
terminator <- trm("evals", n_evals = 20)
at_ridge <- AutoTuner$new(
learner = glrn_ridge2,
resampling = rsmp("cv", folds = 3),
measure = measure,
search_space = tune_lambda,
terminator = terminator,
tuner = tuner
)
at_ridge$train(task_original, row_ids = train_set)
## INFO [04:04:55.837] [bbotk] Starting to optimize 1 parameter(s) with '<TunerGridSearch>' and '<TerminatorEvals> [n_evals=20, k=0]'
## INFO [04:04:55.938] [bbotk] Evaluating 1 configuration(s)
## INFO [04:04:56.069] [mlr3] Running benchmark with 3 resampling iterations
## INFO [04:04:56.232] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 3/3)
## INFO [04:05:12.954] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 1/3)
## INFO [04:05:26.835] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 2/3)
## INFO [04:05:40.975] [mlr3] Finished benchmark
## INFO [04:05:41.182] [bbotk] Result of batch 1:
## INFO [04:05:41.206] [bbotk] regr.glmnet.lambda regr.mse warnings errors runtime_learners
## INFO [04:05:41.206] [bbotk] 0.03 101.3591 0 0 44.568
## INFO [04:05:41.206] [bbotk] uhash
## INFO [04:05:41.206] [bbotk] 1bb99e62-a930-437e-9cb9-1683654c1b8a
## INFO [04:05:41.214] [bbotk] Evaluating 1 configuration(s)
## INFO [04:05:41.399] [mlr3] Running benchmark with 3 resampling iterations
## INFO [04:05:41.431] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 2/3)
## INFO [04:05:54.924] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 3/3)
## INFO [04:06:08.061] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 1/3)
## INFO [04:06:19.847] [mlr3] Finished benchmark
## INFO [04:06:19.903] [bbotk] Result of batch 2:
## INFO [04:06:19.905] [bbotk] regr.glmnet.lambda regr.mse warnings errors runtime_learners
## INFO [04:06:19.905] [bbotk] 1.124444 101.368 0 0 38.343
## INFO [04:06:19.905] [bbotk] uhash
## INFO [04:06:19.905] [bbotk] e3badafa-2a97-4b7b-b141-75534e353e67
## INFO [04:06:19.907] [bbotk] Evaluating 1 configuration(s)
## INFO [04:06:19.975] [mlr3] Running benchmark with 3 resampling iterations
## INFO [04:06:19.985] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 2/3)
## INFO [04:06:35.305] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 1/3)
## INFO [04:06:48.452] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 3/3)
## INFO [04:07:04.378] [mlr3] Finished benchmark
## INFO [04:07:04.455] [bbotk] Result of batch 3:
## INFO [04:07:04.459] [bbotk] regr.glmnet.lambda regr.mse warnings errors runtime_learners
## INFO [04:07:04.459] [bbotk] 2 101.3841 0 0 44.323
## INFO [04:07:04.459] [bbotk] uhash
## INFO [04:07:04.459] [bbotk] fb80b5f5-470b-4c17-9cb9-f6936a4f77e8
## INFO [04:07:04.461] [bbotk] Evaluating 1 configuration(s)
## INFO [04:07:04.562] [mlr3] Running benchmark with 3 resampling iterations
## INFO [04:07:04.578] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 2/3)
## INFO [04:07:17.643] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 1/3)
## INFO [04:07:31.230] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 3/3)
## INFO [04:07:44.274] [mlr3] Finished benchmark
## INFO [04:07:44.433] [bbotk] Result of batch 4:
## INFO [04:07:44.438] [bbotk] regr.glmnet.lambda regr.mse warnings errors runtime_learners
## INFO [04:07:44.438] [bbotk] 0.9055556 101.3651 0 0 39.624
## INFO [04:07:44.438] [bbotk] uhash
## INFO [04:07:44.438] [bbotk] 0fa39165-eab7-4739-95da-09a421dde0dc
## INFO [04:07:44.443] [bbotk] Evaluating 1 configuration(s)
## INFO [04:07:44.517] [mlr3] Running benchmark with 3 resampling iterations
## INFO [04:07:44.528] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 2/3)
## INFO [04:07:56.502] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 3/3)
## INFO [04:08:15.630] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 1/3)
## INFO [04:08:31.694] [mlr3] Finished benchmark
## INFO [04:08:31.868] [bbotk] Result of batch 5:
## INFO [04:08:31.873] [bbotk] regr.glmnet.lambda regr.mse warnings errors runtime_learners
## INFO [04:08:31.873] [bbotk] 1.781111 101.3795 0 0 47.062
## INFO [04:08:31.873] [bbotk] uhash
## INFO [04:08:31.873] [bbotk] e9fbbeae-17c3-4a36-96b5-cc353e752237
## INFO [04:08:31.880] [bbotk] Evaluating 1 configuration(s)
## INFO [04:08:32.021] [mlr3] Running benchmark with 3 resampling iterations
## INFO [04:08:32.040] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 2/3)
## INFO [04:08:47.770] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 1/3)
## INFO [04:09:02.833] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 3/3)
## INFO [04:09:16.761] [mlr3] Finished benchmark
## INFO [04:09:16.884] [bbotk] Result of batch 6:
## INFO [04:09:16.888] [bbotk] regr.glmnet.lambda regr.mse warnings errors runtime_learners
## INFO [04:09:16.888] [bbotk] 0.4677778 101.3607 0 0 44.606
## INFO [04:09:16.888] [bbotk] uhash
## INFO [04:09:16.888] [bbotk] 7dd216bc-59e2-465f-9ac4-b26bd817dba1
## INFO [04:09:16.892] [bbotk] Evaluating 1 configuration(s)
## INFO [04:09:17.002] [mlr3] Running benchmark with 3 resampling iterations
## INFO [04:09:17.021] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 3/3)
## INFO [04:09:32.574] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 2/3)
## INFO [04:09:45.321] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 1/3)
## INFO [04:10:05.538] [mlr3] Finished benchmark
## INFO [04:10:05.708] [bbotk] Result of batch 7:
## INFO [04:10:05.713] [bbotk] regr.glmnet.lambda regr.mse warnings errors runtime_learners
## INFO [04:10:05.713] [bbotk] 1.343333 101.3715 0 0 48.366
## INFO [04:10:05.713] [bbotk] uhash
## INFO [04:10:05.713] [bbotk] 08a19f0b-4ebc-44cc-8448-9c30988dcabb
## INFO [04:10:05.717] [bbotk] Evaluating 1 configuration(s)
## INFO [04:10:05.822] [mlr3] Running benchmark with 3 resampling iterations
## INFO [04:10:05.840] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 3/3)
## INFO [04:10:21.067] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 2/3)
## INFO [04:10:35.523] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 1/3)
## INFO [04:10:49.160] [mlr3] Finished benchmark
## INFO [04:10:49.243] [bbotk] Result of batch 8:
## INFO [04:10:49.246] [bbotk] regr.glmnet.lambda regr.mse warnings errors runtime_learners
## INFO [04:10:49.246] [bbotk] 0.2488889 101.3595 0 0 43.264
## INFO [04:10:49.246] [bbotk] uhash
## INFO [04:10:49.246] [bbotk] 44902c76-c509-472c-be50-cc5581639d3b
## INFO [04:10:49.248] [bbotk] Evaluating 1 configuration(s)
## INFO [04:10:49.312] [mlr3] Running benchmark with 3 resampling iterations
## INFO [04:10:49.321] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 1/3)
## INFO [04:11:03.151] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 2/3)
## INFO [04:11:18.961] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 3/3)
## INFO [04:11:32.642] [mlr3] Finished benchmark
## INFO [04:11:32.771] [bbotk] Result of batch 9:
## INFO [04:11:32.775] [bbotk] regr.glmnet.lambda regr.mse warnings errors runtime_learners
## INFO [04:11:32.775] [bbotk] 1.562222 101.3753 0 0 43.207
## INFO [04:11:32.775] [bbotk] uhash
## INFO [04:11:32.775] [bbotk] 47d1ecd9-10d2-47e1-89e2-85ed4cfcb4f2
## INFO [04:11:32.778] [bbotk] Evaluating 1 configuration(s)
## INFO [04:11:32.878] [mlr3] Running benchmark with 3 resampling iterations
## INFO [04:11:32.895] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 1/3)
## INFO [04:11:49.071] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 2/3)
## INFO [04:12:03.173] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 3/3)
## INFO [04:12:28.475] [mlr3] Finished benchmark
## INFO [04:12:28.537] [bbotk] Result of batch 10:
## INFO [04:12:28.539] [bbotk] regr.glmnet.lambda regr.mse warnings errors runtime_learners
## INFO [04:12:28.539] [bbotk] 0.6866667 101.3626 0 0 55.502
## INFO [04:12:28.539] [bbotk] uhash
## INFO [04:12:28.539] [bbotk] 45892faf-acb2-4e5e-b831-f0c892d5d99d
## INFO [04:12:28.547] [bbotk] Finished optimizing after 10 evaluation(s)
## INFO [04:12:28.548] [bbotk] Result:
## INFO [04:12:28.550] [bbotk] regr.glmnet.lambda learner_param_vals x_domain regr.mse
## INFO [04:12:28.550] [bbotk] 0.03 <list[3]> <list[1]> 101.3591
at_ridge$predict(task_original, row_ids = test_set)$score()
## regr.mse
## 78.64359
### regr.mse = 70.13002
learner_rf <- lrn('regr.ranger')
learner_rf$param_set$values <- list(min.node.size = 4)
gr_rf <- po('scale') %>>%
po('imputemean') %>>%
po(learner_rf)
glrn_rf <- GraphLearner$new(gr_rf)
tune_ntrees <- ParamSet$new (list(
ParamInt$new('regr.ranger.num.trees', lower = 50, upper = 600)))
at_rf <- AutoTuner$new(
learner = glrn_rf,
resampling = rsmp('cv', folds = 3),
measure = measure,
search_space = tune_ntrees,
terminator = terminator,
tuner = tuner)
at_rf$train(task_original, row_ids = train_set)
## INFO [04:12:48.867] [bbotk] Starting to optimize 1 parameter(s) with '<TunerGridSearch>' and '<TerminatorEvals> [n_evals=20, k=0]'
## INFO [04:12:48.891] [bbotk] Evaluating 1 configuration(s)
## INFO [04:12:49.015] [mlr3] Running benchmark with 3 resampling iterations
## INFO [04:12:49.045] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 2/3)
## INFO [04:13:10.142] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 1/3)
## INFO [04:13:30.389] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 3/3)
## INFO [04:13:52.395] [mlr3] Finished benchmark
## INFO [04:13:52.496] [bbotk] Result of batch 1:
## INFO [04:13:52.503] [bbotk] regr.ranger.num.trees regr.mse warnings errors runtime_learners
## INFO [04:13:52.503] [bbotk] 172 106.9784 0 0 63.184
## INFO [04:13:52.503] [bbotk] uhash
## INFO [04:13:52.503] [bbotk] 8c6048fb-6bd9-46e8-89ce-022feed544ff
## INFO [04:13:52.506] [bbotk] Evaluating 1 configuration(s)
## INFO [04:13:52.605] [mlr3] Running benchmark with 3 resampling iterations
## INFO [04:13:52.625] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 2/3)
## INFO [04:14:22.159] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 3/3)
## INFO [04:14:51.381] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 1/3)
## INFO [04:15:23.481] [mlr3] Finished benchmark
## INFO [04:15:23.893] [bbotk] Result of batch 2:
## INFO [04:15:23.916] [bbotk] regr.ranger.num.trees regr.mse warnings errors runtime_learners
## INFO [04:15:23.916] [bbotk] 600 106.7579 0 0 90.4
## INFO [04:15:23.916] [bbotk] uhash
## INFO [04:15:23.916] [bbotk] bfe83a56-1831-488a-9395-6565e76c5478
## INFO [04:15:23.930] [bbotk] Evaluating 1 configuration(s)
## INFO [04:15:24.228] [mlr3] Running benchmark with 3 resampling iterations
## INFO [04:15:24.299] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 2/3)
## INFO [04:15:51.014] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 1/3)
## INFO [04:16:17.163] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 3/3)
## INFO [04:16:41.515] [mlr3] Finished benchmark
## INFO [04:16:41.874] [bbotk] Result of batch 3:
## INFO [04:16:41.890] [bbotk] regr.ranger.num.trees regr.mse warnings errors runtime_learners
## INFO [04:16:41.890] [bbotk] 356 107.1674 0 0 76.932
## INFO [04:16:41.890] [bbotk] uhash
## INFO [04:16:41.890] [bbotk] 12586678-1444-4d0a-88b8-74ed5b084863
## INFO [04:16:41.897] [bbotk] Evaluating 1 configuration(s)
## INFO [04:16:42.197] [mlr3] Running benchmark with 3 resampling iterations
## INFO [04:16:42.237] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 3/3)
## INFO [04:17:09.079] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 2/3)
## INFO [04:17:34.603] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 1/3)
## INFO [04:18:00.931] [mlr3] Finished benchmark
## INFO [04:18:01.153] [bbotk] Result of batch 4:
## INFO [04:18:01.163] [bbotk] regr.ranger.num.trees regr.mse warnings errors runtime_learners
## INFO [04:18:01.163] [bbotk] 478 107.0987 0 0 78.531
## INFO [04:18:01.163] [bbotk] uhash
## INFO [04:18:01.163] [bbotk] 7fa40647-5990-434d-bac4-d568070b3847
## INFO [04:18:01.177] [bbotk] Evaluating 1 configuration(s)
## INFO [04:18:01.372] [mlr3] Running benchmark with 3 resampling iterations
## INFO [04:18:01.407] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 2/3)
## INFO [04:18:22.273] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 1/3)
## INFO [04:18:42.466] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 3/3)
## INFO [04:19:03.414] [mlr3] Finished benchmark
## INFO [04:19:03.620] [bbotk] Result of batch 5:
## INFO [04:19:03.628] [bbotk] regr.ranger.num.trees regr.mse warnings errors runtime_learners
## INFO [04:19:03.628] [bbotk] 233 106.9801 0 0 61.872
## INFO [04:19:03.628] [bbotk] uhash
## INFO [04:19:03.628] [bbotk] 910106cf-c31d-4b96-93e8-4aa55dced883
## INFO [04:19:03.632] [bbotk] Evaluating 1 configuration(s)
## INFO [04:19:03.801] [mlr3] Running benchmark with 3 resampling iterations
## INFO [04:19:03.821] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 3/3)
## INFO [04:19:32.984] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 2/3)
## INFO [04:19:58.931] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 1/3)
## INFO [04:20:27.035] [mlr3] Finished benchmark
## INFO [04:20:27.270] [bbotk] Result of batch 6:
## INFO [04:20:27.275] [bbotk] regr.ranger.num.trees regr.mse warnings errors runtime_learners
## INFO [04:20:27.275] [bbotk] 539 107.0493 0 0 83.052
## INFO [04:20:27.275] [bbotk] uhash
## INFO [04:20:27.275] [bbotk] cdd2dab4-7764-4d52-9274-c4025d30d9c2
## INFO [04:20:27.279] [bbotk] Evaluating 1 configuration(s)
## INFO [04:20:27.413] [mlr3] Running benchmark with 3 resampling iterations
## INFO [04:20:27.431] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 2/3)
## INFO [04:20:48.651] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 3/3)
## INFO [04:21:10.526] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 1/3)
## INFO [04:21:32.226] [mlr3] Finished benchmark
## INFO [04:21:32.413] [bbotk] Result of batch 7:
## INFO [04:21:32.418] [bbotk] regr.ranger.num.trees regr.mse warnings errors runtime_learners
## INFO [04:21:32.418] [bbotk] 294 107.1791 0 0 64.646
## INFO [04:21:32.418] [bbotk] uhash
## INFO [04:21:32.418] [bbotk] 4221803d-d8ca-42d1-8490-c831d61202e0
## INFO [04:21:32.423] [bbotk] Evaluating 1 configuration(s)
## INFO [04:21:32.552] [mlr3] Running benchmark with 3 resampling iterations
## INFO [04:21:32.574] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 1/3)
## INFO [04:21:57.104] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 3/3)
## INFO [04:22:21.523] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 2/3)
## INFO [04:22:45.686] [mlr3] Finished benchmark
## INFO [04:22:45.895] [bbotk] Result of batch 8:
## INFO [04:22:45.903] [bbotk] regr.ranger.num.trees regr.mse warnings errors runtime_learners
## INFO [04:22:45.903] [bbotk] 417 107.1271 0 0 72.987
## INFO [04:22:45.903] [bbotk] uhash
## INFO [04:22:45.903] [bbotk] 57b7eb28-5980-4847-a2a7-44af08aca112
## INFO [04:22:45.907] [bbotk] Evaluating 1 configuration(s)
## INFO [04:22:46.066] [mlr3] Running benchmark with 3 resampling iterations
## INFO [04:22:46.094] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 1/3)
## INFO [04:23:03.924] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 3/3)
## INFO [04:23:20.903] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 2/3)
## INFO [04:23:37.687] [mlr3] Finished benchmark
## INFO [04:23:37.863] [bbotk] Result of batch 9:
## INFO [04:23:37.869] [bbotk] regr.ranger.num.trees regr.mse warnings errors runtime_learners
## INFO [04:23:37.869] [bbotk] 111 107.1279 0 0 51.471
## INFO [04:23:37.869] [bbotk] uhash
## INFO [04:23:37.869] [bbotk] b471d40b-1673-41b6-ab52-fbd972c8f5a6
## INFO [04:23:37.874] [bbotk] Evaluating 1 configuration(s)
## INFO [04:23:38.023] [mlr3] Running benchmark with 3 resampling iterations
## INFO [04:23:38.040] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 1/3)
## INFO [04:23:54.461] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 3/3)
## INFO [04:24:10.505] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 2/3)
## INFO [04:24:27.269] [mlr3] Finished benchmark
## INFO [04:24:27.452] [bbotk] Result of batch 10:
## INFO [04:24:27.458] [bbotk] regr.ranger.num.trees regr.mse warnings errors runtime_learners
## INFO [04:24:27.458] [bbotk] 50 107.7743 0 0 49.118
## INFO [04:24:27.458] [bbotk] uhash
## INFO [04:24:27.458] [bbotk] b9b7f423-d2ae-42b8-a9af-b66c2adfb2de
## INFO [04:24:27.543] [bbotk] Finished optimizing after 10 evaluation(s)
## INFO [04:24:27.548] [bbotk] Result:
## INFO [04:24:27.553] [bbotk] regr.ranger.num.trees learner_param_vals x_domain regr.mse
## INFO [04:24:27.553] [bbotk] 600 <list[3]> <list[1]> 106.7579
at_rf$predict(task_original, row_ids = test_set)$score()
## regr.mse
## 83.88102
### regr.mse = 35.71414
set.seed(123)
# list of learners
lrn_list <- list(
glrn_lm,
glrn_ridge,
at_ridge,
at_rf
)
# Set the benchmark design and run the comparisons
bm_design <- benchmark_grid(task = task_original, resamplings = rsmp('cv', folds = 3),
learners = lrn_list)
bmr <- benchmark(bm_design, store_models = TRUE)
## INFO [04:25:06.391] [mlr3] Running benchmark with 12 resampling iterations
## INFO [04:25:06.411] [mlr3] Applying learner 'imputemean.regr.lm' on task 'Delay' (iter 1/3)
## INFO [04:25:12.588] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 2/3)
## INFO [04:25:27.166] [mlr3] Applying learner 'scale.imputemean.regr.ranger.tuned' on task 'Delay' (iter 2/3)
## INFO [04:25:27.895] [bbotk] Starting to optimize 1 parameter(s) with '<TunerGridSearch>' and '<TerminatorEvals> [n_evals=20, k=0]'
## INFO [04:25:27.916] [bbotk] Evaluating 1 configuration(s)
## INFO [04:25:28.129] [mlr3] Running benchmark with 3 resampling iterations
## INFO [04:25:28.165] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 1/3)
## INFO [04:25:45.221] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 2/3)
## INFO [04:26:00.662] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 3/3)
## INFO [04:26:16.352] [mlr3] Finished benchmark
## INFO [04:26:16.553] [bbotk] Result of batch 1:
## INFO [04:26:16.560] [bbotk] regr.ranger.num.trees regr.mse warnings errors runtime_learners
## INFO [04:26:16.560] [bbotk] 50 101.7081 0 0 48.02
## INFO [04:26:16.560] [bbotk] uhash
## INFO [04:26:16.560] [bbotk] db5e87ed-c85d-4606-b16e-6fe6d65878df
## INFO [04:26:16.566] [bbotk] Evaluating 1 configuration(s)
## INFO [04:26:16.786] [mlr3] Running benchmark with 3 resampling iterations
## INFO [04:26:16.812] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 1/3)
## INFO [04:26:37.511] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 3/3)
## INFO [04:26:56.552] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 2/3)
## INFO [04:27:17.484] [mlr3] Finished benchmark
## INFO [04:27:17.660] [bbotk] Result of batch 2:
## INFO [04:27:17.673] [bbotk] regr.ranger.num.trees regr.mse warnings errors runtime_learners
## INFO [04:27:17.673] [bbotk] 294 101.2209 0 0 60.577
## INFO [04:27:17.673] [bbotk] uhash
## INFO [04:27:17.673] [bbotk] c96c2a83-726a-465d-a076-49079309b3ad
## INFO [04:27:17.676] [bbotk] Evaluating 1 configuration(s)
## INFO [04:27:17.834] [mlr3] Running benchmark with 3 resampling iterations
## INFO [04:27:17.880] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 1/3)
## INFO [04:27:34.514] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 3/3)
## INFO [04:27:51.515] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 2/3)
## INFO [04:28:08.070] [mlr3] Finished benchmark
## INFO [04:28:08.133] [bbotk] Result of batch 3:
## INFO [04:28:08.136] [bbotk] regr.ranger.num.trees regr.mse warnings errors runtime_learners
## INFO [04:28:08.136] [bbotk] 111 101.6412 0 0 50.096
## INFO [04:28:08.136] [bbotk] uhash
## INFO [04:28:08.136] [bbotk] e18ecc07-6b72-4aa6-831b-7d89bcf1f716
## INFO [04:28:08.138] [bbotk] Evaluating 1 configuration(s)
## INFO [04:28:08.189] [mlr3] Running benchmark with 3 resampling iterations
## INFO [04:28:08.205] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 1/3)
## INFO [04:28:32.206] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 3/3)
## INFO [04:28:58.013] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 2/3)
## INFO [04:29:21.826] [mlr3] Finished benchmark
## INFO [04:29:22.010] [bbotk] Result of batch 4:
## INFO [04:29:22.016] [bbotk] regr.ranger.num.trees regr.mse warnings errors runtime_learners
## INFO [04:29:22.016] [bbotk] 539 100.9735 0 0 73.525
## INFO [04:29:22.016] [bbotk] uhash
## INFO [04:29:22.016] [bbotk] 05dbbba1-c650-412d-983e-7a4ea212a375
## INFO [04:29:22.022] [bbotk] Evaluating 1 configuration(s)
## INFO [04:29:22.286] [mlr3] Running benchmark with 3 resampling iterations
## INFO [04:29:22.344] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 1/3)
## INFO [04:29:44.256] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 2/3)
## INFO [04:30:05.663] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 3/3)
## INFO [04:30:26.298] [mlr3] Finished benchmark
## INFO [04:30:26.362] [bbotk] Result of batch 5:
## INFO [04:30:26.364] [bbotk] regr.ranger.num.trees regr.mse warnings errors runtime_learners
## INFO [04:30:26.364] [bbotk] 356 101.1191 0 0 63.891
## INFO [04:30:26.364] [bbotk] uhash
## INFO [04:30:26.364] [bbotk] 18f4a04a-714d-4a23-9389-8b1955f225b3
## INFO [04:30:26.366] [bbotk] Evaluating 1 configuration(s)
## INFO [04:30:26.409] [mlr3] Running benchmark with 3 resampling iterations
## INFO [04:30:26.418] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 3/3)
## INFO [04:30:49.024] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 2/3)
## INFO [04:31:12.113] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 1/3)
## INFO [04:31:42.188] [mlr3] Finished benchmark
## INFO [04:31:42.276] [bbotk] Result of batch 6:
## INFO [04:31:42.279] [bbotk] regr.ranger.num.trees regr.mse warnings errors runtime_learners
## INFO [04:31:42.279] [bbotk] 478 101.0684 0 0 75.697
## INFO [04:31:42.279] [bbotk] uhash
## INFO [04:31:42.279] [bbotk] b5c8e22c-f7b5-454a-ae9f-5d16fb038e80
## INFO [04:31:42.281] [bbotk] Evaluating 1 configuration(s)
## INFO [04:31:42.335] [mlr3] Running benchmark with 3 resampling iterations
## INFO [04:31:42.347] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 2/3)
## INFO [04:32:13.135] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 1/3)
## INFO [04:32:41.323] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 3/3)
## INFO [04:33:09.826] [mlr3] Finished benchmark
## INFO [04:33:10.269] [bbotk] Result of batch 7:
## INFO [04:33:10.307] [bbotk] regr.ranger.num.trees regr.mse warnings errors runtime_learners
## INFO [04:33:10.307] [bbotk] 600 100.9992 0 0 87.171
## INFO [04:33:10.307] [bbotk] uhash
## INFO [04:33:10.307] [bbotk] 2737bee5-bf83-4074-9f46-0776f3a817f1
## INFO [04:33:10.315] [bbotk] Evaluating 1 configuration(s)
## INFO [04:33:10.591] [mlr3] Running benchmark with 3 resampling iterations
## INFO [04:33:10.647] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 1/3)
## INFO [04:33:31.269] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 2/3)
## INFO [04:33:52.609] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 3/3)
## INFO [04:34:12.103] [mlr3] Finished benchmark
## INFO [04:34:12.515] [bbotk] Result of batch 8:
## INFO [04:34:12.533] [bbotk] regr.ranger.num.trees regr.mse warnings errors runtime_learners
## INFO [04:34:12.533] [bbotk] 233 101.1252 0 0 61.248
## INFO [04:34:12.533] [bbotk] uhash
## INFO [04:34:12.533] [bbotk] fe38a62b-7532-448e-b40f-ffc50393a03e
## INFO [04:34:12.541] [bbotk] Evaluating 1 configuration(s)
## INFO [04:34:12.815] [mlr3] Running benchmark with 3 resampling iterations
## INFO [04:34:12.849] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 3/3)
## INFO [04:34:41.311] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 2/3)
## INFO [04:35:07.207] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 1/3)
## INFO [04:35:31.000] [mlr3] Finished benchmark
## INFO [04:35:31.302] [bbotk] Result of batch 9:
## INFO [04:35:31.314] [bbotk] regr.ranger.num.trees regr.mse warnings errors runtime_learners
## INFO [04:35:31.314] [bbotk] 172 101.2901 0 0 77.886
## INFO [04:35:31.314] [bbotk] uhash
## INFO [04:35:31.314] [bbotk] b69669d7-474d-4a46-a2cb-b8729293b445
## INFO [04:35:31.321] [bbotk] Evaluating 1 configuration(s)
## INFO [04:35:31.554] [mlr3] Running benchmark with 3 resampling iterations
## INFO [04:35:31.583] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 2/3)
## INFO [04:36:05.160] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 3/3)
## INFO [04:36:36.812] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 1/3)
## INFO [04:37:09.519] [mlr3] Finished benchmark
## INFO [04:37:09.810] [bbotk] Result of batch 10:
## INFO [04:37:09.820] [bbotk] regr.ranger.num.trees regr.mse warnings errors runtime_learners
## INFO [04:37:09.820] [bbotk] 417 101.2634 0 0 97.611
## INFO [04:37:09.820] [bbotk] uhash
## INFO [04:37:09.820] [bbotk] ae570db2-a02a-4913-8419-9f8242ad0c3b
## INFO [04:37:09.875] [bbotk] Finished optimizing after 10 evaluation(s)
## INFO [04:37:09.877] [bbotk] Result:
## INFO [04:37:09.883] [bbotk] regr.ranger.num.trees learner_param_vals x_domain regr.mse
## INFO [04:37:09.883] [bbotk] 539 <list[3]> <list[1]> 100.9735
## INFO [04:38:01.966] [mlr3] Applying learner 'imputemean.regr.lm' on task 'Delay' (iter 3/3)
## INFO [04:38:11.582] [mlr3] Applying learner 'imputemean.regr.lm' on task 'Delay' (iter 2/3)
## INFO [04:38:19.873] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 1/3)
## INFO [04:38:40.024] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 3/3)
## INFO [04:38:57.149] [mlr3] Applying learner 'scale.imputemean.regr.ranger.tuned' on task 'Delay' (iter 3/3)
## INFO [04:38:58.322] [bbotk] Starting to optimize 1 parameter(s) with '<TunerGridSearch>' and '<TerminatorEvals> [n_evals=20, k=0]'
## INFO [04:38:58.345] [bbotk] Evaluating 1 configuration(s)
## INFO [04:38:58.510] [mlr3] Running benchmark with 3 resampling iterations
## INFO [04:38:58.552] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 1/3)
## INFO [04:39:38.406] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 3/3)
## INFO [04:40:13.366] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 2/3)
## INFO [04:40:43.885] [mlr3] Finished benchmark
## INFO [04:40:44.240] [bbotk] Result of batch 1:
## INFO [04:40:44.261] [bbotk] regr.ranger.num.trees regr.mse warnings errors runtime_learners
## INFO [04:40:44.261] [bbotk] 600 93.1543 0 0 104.869
## INFO [04:40:44.261] [bbotk] uhash
## INFO [04:40:44.261] [bbotk] 4e6f5d13-7e37-4e68-96cf-da7795394ccd
## INFO [04:40:44.268] [bbotk] Evaluating 1 configuration(s)
## INFO [04:40:44.415] [mlr3] Running benchmark with 3 resampling iterations
## INFO [04:40:44.442] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 2/3)
## INFO [04:41:08.574] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 3/3)
## INFO [04:41:43.022] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 1/3)
## INFO [04:42:19.044] [mlr3] Finished benchmark
## INFO [04:42:19.508] [bbotk] Result of batch 2:
## INFO [04:42:19.524] [bbotk] regr.ranger.num.trees regr.mse warnings errors runtime_learners
## INFO [04:42:19.524] [bbotk] 233 93.49332 0 0 93.993
## INFO [04:42:19.524] [bbotk] uhash
## INFO [04:42:19.524] [bbotk] a2bb6f7f-2dfc-4c7c-b5c7-8ee09b415246
## INFO [04:42:19.534] [bbotk] Evaluating 1 configuration(s)
## INFO [04:42:19.815] [mlr3] Running benchmark with 3 resampling iterations
## INFO [04:42:19.888] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 2/3)
## INFO [04:42:52.893] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 3/3)
## INFO [04:43:33.623] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 1/3)
## INFO [04:44:19.392] [mlr3] Finished benchmark
## INFO [04:44:20.826] [bbotk] Result of batch 3:
## INFO [04:44:20.853] [bbotk] regr.ranger.num.trees regr.mse warnings errors runtime_learners
## INFO [04:44:20.853] [bbotk] 356 93.22086 0 0 118.789
## INFO [04:44:20.853] [bbotk] uhash
## INFO [04:44:20.853] [bbotk] 359e4b6f-b74a-40e8-b2b8-c77f4b29f3d3
## INFO [04:44:20.866] [bbotk] Evaluating 1 configuration(s)
## INFO [04:44:21.198] [mlr3] Running benchmark with 3 resampling iterations
## INFO [04:44:21.264] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 1/3)
## INFO [04:45:14.562] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 2/3)
## INFO [04:47:23.522] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 3/3)
## INFO [04:48:01.467] [mlr3] Finished benchmark
## INFO [04:48:02.519] [bbotk] Result of batch 4:
## INFO [04:48:02.543] [bbotk] regr.ranger.num.trees regr.mse warnings errors runtime_learners
## INFO [04:48:02.543] [bbotk] 294 93.17571 0 0 219.125
## INFO [04:48:02.543] [bbotk] uhash
## INFO [04:48:02.543] [bbotk] 6839a2d9-6d1b-4b73-84f5-a48df088f98e
## INFO [04:48:02.552] [bbotk] Evaluating 1 configuration(s)
## INFO [04:48:02.777] [mlr3] Running benchmark with 3 resampling iterations
## INFO [04:48:02.822] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 3/3)
## INFO [04:48:23.131] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 2/3)
## INFO [04:48:42.226] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 1/3)
## INFO [04:48:57.805] [mlr3] Finished benchmark
## INFO [04:48:58.047] [bbotk] Result of batch 5:
## INFO [04:48:58.052] [bbotk] regr.ranger.num.trees regr.mse warnings errors runtime_learners
## INFO [04:48:58.052] [bbotk] 50 93.87929 0 0 54.801
## INFO [04:48:58.052] [bbotk] uhash
## INFO [04:48:58.052] [bbotk] 08c18333-fa85-4ebd-ba49-d4c01291bad2
## INFO [04:48:58.055] [bbotk] Evaluating 1 configuration(s)
## INFO [04:48:58.123] [mlr3] Running benchmark with 3 resampling iterations
## INFO [04:48:58.137] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 3/3)
## INFO [04:49:24.509] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 2/3)
## INFO [04:49:55.676] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 1/3)
## INFO [04:50:38.863] [mlr3] Finished benchmark
## INFO [04:50:39.202] [bbotk] Result of batch 6:
## INFO [04:50:39.224] [bbotk] regr.ranger.num.trees regr.mse warnings errors runtime_learners
## INFO [04:50:39.224] [bbotk] 417 93.39374 0 0 100.426
## INFO [04:50:39.224] [bbotk] uhash
## INFO [04:50:39.224] [bbotk] b8e3cff2-12b4-4c47-a429-729c391c17ed
## INFO [04:50:39.234] [bbotk] Evaluating 1 configuration(s)
## INFO [04:50:39.566] [mlr3] Running benchmark with 3 resampling iterations
## INFO [04:50:39.604] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 1/3)
## INFO [04:51:03.838] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 3/3)
## INFO [04:51:27.571] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 2/3)
## INFO [04:51:48.030] [mlr3] Finished benchmark
## INFO [04:51:48.276] [bbotk] Result of batch 7:
## INFO [04:51:48.293] [bbotk] regr.ranger.num.trees regr.mse warnings errors runtime_learners
## INFO [04:51:48.293] [bbotk] 172 93.09661 0 0 68.128
## INFO [04:51:48.293] [bbotk] uhash
## INFO [04:51:48.293] [bbotk] 4ea6fe48-87d4-4523-9d6f-4b15c9bed0f6
## INFO [04:51:48.301] [bbotk] Evaluating 1 configuration(s)
## INFO [04:51:48.512] [mlr3] Running benchmark with 3 resampling iterations
## INFO [04:51:48.545] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 1/3)
## INFO [04:52:15.536] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 3/3)
## INFO [04:52:32.574] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 2/3)
## INFO [04:52:54.544] [mlr3] Finished benchmark
## INFO [04:52:54.893] [bbotk] Result of batch 8:
## INFO [04:52:54.914] [bbotk] regr.ranger.num.trees regr.mse warnings errors runtime_learners
## INFO [04:52:54.914] [bbotk] 111 93.51544 0 0 65.826
## INFO [04:52:54.914] [bbotk] uhash
## INFO [04:52:54.914] [bbotk] 8a4d09be-00b2-4aa3-9918-1600289f26a5
## INFO [04:52:54.921] [bbotk] Evaluating 1 configuration(s)
## INFO [04:52:55.154] [mlr3] Running benchmark with 3 resampling iterations
## INFO [04:52:55.214] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 3/3)
## INFO [04:53:24.946] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 1/3)
## INFO [04:54:01.465] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 2/3)
## INFO [04:54:35.387] [mlr3] Finished benchmark
## INFO [04:54:35.669] [bbotk] Result of batch 9:
## INFO [04:54:35.679] [bbotk] regr.ranger.num.trees regr.mse warnings errors runtime_learners
## INFO [04:54:35.679] [bbotk] 478 93.33387 0 0 99.846
## INFO [04:54:35.679] [bbotk] uhash
## INFO [04:54:35.679] [bbotk] 14c3442b-9319-4cc9-b19b-62f4dbd527ab
## INFO [04:54:35.683] [bbotk] Evaluating 1 configuration(s)
## INFO [04:54:35.785] [mlr3] Running benchmark with 3 resampling iterations
## INFO [04:54:35.799] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 2/3)
## INFO [04:55:10.709] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 3/3)
## INFO [04:55:43.360] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 1/3)
## INFO [04:56:18.532] [mlr3] Finished benchmark
## INFO [04:56:19.111] [bbotk] Result of batch 10:
## INFO [04:56:19.125] [bbotk] regr.ranger.num.trees regr.mse warnings errors runtime_learners
## INFO [04:56:19.125] [bbotk] 539 93.05796 0 0 102.395
## INFO [04:56:19.125] [bbotk] uhash
## INFO [04:56:19.125] [bbotk] 2cf23859-575c-4bc2-a87f-0328524f3870
## INFO [04:56:19.209] [bbotk] Finished optimizing after 10 evaluation(s)
## INFO [04:56:19.211] [bbotk] Result:
## INFO [04:56:19.218] [bbotk] regr.ranger.num.trees learner_param_vals x_domain regr.mse
## INFO [04:56:19.218] [bbotk] 539 <list[3]> <list[1]> 93.05796
## INFO [04:57:02.019] [mlr3] Applying learner 'scale.imputemean.regr.glmnet.tuned' on task 'Delay' (iter 2/3)
## INFO [04:57:02.742] [bbotk] Starting to optimize 1 parameter(s) with '<TunerGridSearch>' and '<TerminatorEvals> [n_evals=20, k=0]'
## INFO [04:57:02.762] [bbotk] Evaluating 1 configuration(s)
## INFO [04:57:02.872] [mlr3] Running benchmark with 3 resampling iterations
## INFO [04:57:02.898] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 2/3)
## INFO [04:57:19.966] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 3/3)
## INFO [04:57:37.031] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 1/3)
## INFO [04:58:00.419] [mlr3] Finished benchmark
## INFO [04:58:00.719] [bbotk] Result of batch 1:
## INFO [04:58:00.732] [bbotk] regr.glmnet.lambda regr.mse warnings errors runtime_learners
## INFO [04:58:00.732] [bbotk] 1.562222 98.23961 0 0 57.197
## INFO [04:58:00.732] [bbotk] uhash
## INFO [04:58:00.732] [bbotk] 18f408e8-7349-4d76-ae8f-0f0ca7838c17
## INFO [04:58:00.740] [bbotk] Evaluating 1 configuration(s)
## INFO [04:58:01.103] [mlr3] Running benchmark with 3 resampling iterations
## INFO [04:58:01.227] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 1/3)
## INFO [04:58:16.544] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 2/3)
## INFO [04:58:32.435] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 3/3)
## INFO [04:58:46.364] [mlr3] Finished benchmark
## INFO [04:58:46.595] [bbotk] Result of batch 2:
## INFO [04:58:46.613] [bbotk] regr.glmnet.lambda regr.mse warnings errors runtime_learners
## INFO [04:58:46.613] [bbotk] 0.2488889 98.2254 0 0 44.969
## INFO [04:58:46.613] [bbotk] uhash
## INFO [04:58:46.613] [bbotk] a335b023-87bd-497c-afb4-ae606d0f639a
## INFO [04:58:46.624] [bbotk] Evaluating 1 configuration(s)
## INFO [04:58:49.867] [mlr3] Running benchmark with 3 resampling iterations
## INFO [04:58:49.970] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 1/3)
## INFO [04:59:20.121] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 3/3)
## INFO [04:59:40.484] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 2/3)
## INFO [04:59:58.924] [mlr3] Finished benchmark
## INFO [04:59:59.315] [bbotk] Result of batch 3:
## INFO [04:59:59.331] [bbotk] regr.glmnet.lambda regr.mse warnings errors runtime_learners
## INFO [04:59:59.331] [bbotk] 0.4677778 98.22634 0 0 68.614
## INFO [04:59:59.331] [bbotk] uhash
## INFO [04:59:59.331] [bbotk] 66bf0b6f-0601-4759-9148-2a5e90c53372
## INFO [04:59:59.340] [bbotk] Evaluating 1 configuration(s)
## INFO [04:59:59.610] [mlr3] Running benchmark with 3 resampling iterations
## INFO [04:59:59.658] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 1/3)
## INFO [05:00:17.608] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 3/3)
## INFO [05:00:31.310] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 2/3)
## INFO [05:00:51.720] [mlr3] Finished benchmark
## INFO [05:00:52.205] [bbotk] Result of batch 4:
## INFO [05:00:52.222] [bbotk] regr.glmnet.lambda regr.mse warnings errors runtime_learners
## INFO [05:00:52.222] [bbotk] 1.781111 98.24353 0 0 51.874
## INFO [05:00:52.222] [bbotk] uhash
## INFO [05:00:52.222] [bbotk] c7580234-4fbd-4d5d-9ea7-e758b0dc8097
## INFO [05:00:52.231] [bbotk] Evaluating 1 configuration(s)
## INFO [05:00:52.506] [mlr3] Running benchmark with 3 resampling iterations
## INFO [05:00:52.545] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 2/3)
## INFO [05:01:09.423] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 3/3)
## INFO [05:01:34.678] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 1/3)
## INFO [05:01:54.785] [mlr3] Finished benchmark
## INFO [05:01:55.258] [bbotk] Result of batch 5:
## INFO [05:01:55.272] [bbotk] regr.glmnet.lambda regr.mse warnings errors runtime_learners
## INFO [05:01:55.272] [bbotk] 1.343333 98.23604 0 0 61.912
## INFO [05:01:55.272] [bbotk] uhash
## INFO [05:01:55.272] [bbotk] a6759be1-e5b2-42b9-a604-4a4b22310c75
## INFO [05:01:55.292] [bbotk] Evaluating 1 configuration(s)
## INFO [05:01:55.641] [mlr3] Running benchmark with 3 resampling iterations
## INFO [05:01:55.694] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 3/3)
## INFO [05:03:45.585] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 2/3)
## INFO [05:04:16.709] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 1/3)
## INFO [05:04:43.324] [mlr3] Finished benchmark
## INFO [05:04:44.135] [bbotk] Result of batch 6:
## INFO [05:04:44.200] [bbotk] regr.glmnet.lambda regr.mse warnings errors runtime_learners
## INFO [05:04:44.200] [bbotk] 2 98.24775 0 0 166.827
## INFO [05:04:44.200] [bbotk] uhash
## INFO [05:04:44.200] [bbotk] e5f1f380-8468-4812-ae6f-d4c833bb2bfa
## INFO [05:04:44.260] [bbotk] Evaluating 1 configuration(s)
## INFO [05:04:44.835] [mlr3] Running benchmark with 3 resampling iterations
## INFO [05:04:44.961] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 2/3)
## INFO [05:07:27.498] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 1/3)
## INFO [05:07:47.881] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 3/3)
## INFO [05:08:07.277] [mlr3] Finished benchmark
## INFO [05:08:07.657] [bbotk] Result of batch 7:
## INFO [05:08:07.673] [bbotk] regr.glmnet.lambda regr.mse warnings errors runtime_learners
## INFO [05:08:07.673] [bbotk] 0.03 98.22522 0 0 201.62
## INFO [05:08:07.673] [bbotk] uhash
## INFO [05:08:07.673] [bbotk] f9756dd0-28da-4b29-9a5d-fe63298e1b67
## INFO [05:08:07.683] [bbotk] Evaluating 1 configuration(s)
## INFO [05:08:07.950] [mlr3] Running benchmark with 3 resampling iterations
## INFO [05:08:08.010] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 2/3)
## INFO [05:08:24.808] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 3/3)
## INFO [05:08:42.025] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 1/3)
## INFO [05:09:00.491] [mlr3] Finished benchmark
## INFO [05:09:00.879] [bbotk] Result of batch 8:
## INFO [05:09:00.894] [bbotk] regr.glmnet.lambda regr.mse warnings errors runtime_learners
## INFO [05:09:00.894] [bbotk] 0.6866667 98.22795 0 0 52.195
## INFO [05:09:00.894] [bbotk] uhash
## INFO [05:09:00.894] [bbotk] 5713da6a-207f-48c0-ac21-3a4ef9fcfb08
## INFO [05:09:00.899] [bbotk] Evaluating 1 configuration(s)
## INFO [05:09:01.164] [mlr3] Running benchmark with 3 resampling iterations
## INFO [05:09:01.211] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 1/3)
## INFO [05:09:19.069] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 3/3)
## INFO [05:09:34.881] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 2/3)
## INFO [05:09:48.686] [mlr3] Finished benchmark
## INFO [05:09:48.863] [bbotk] Result of batch 9:
## INFO [05:09:48.869] [bbotk] regr.glmnet.lambda regr.mse warnings errors runtime_learners
## INFO [05:09:48.869] [bbotk] 0.9055556 98.23015 0 0 47.219
## INFO [05:09:48.869] [bbotk] uhash
## INFO [05:09:48.869] [bbotk] 532a7faf-85e4-4052-be21-f3b07c9fa3df
## INFO [05:09:48.873] [bbotk] Evaluating 1 configuration(s)
## INFO [05:09:48.998] [mlr3] Running benchmark with 3 resampling iterations
## INFO [05:09:49.022] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 3/3)
## INFO [05:10:03.697] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 1/3)
## INFO [05:10:20.223] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 2/3)
## INFO [05:10:34.032] [mlr3] Finished benchmark
## INFO [05:10:34.476] [bbotk] Result of batch 10:
## INFO [05:10:34.486] [bbotk] regr.glmnet.lambda regr.mse warnings errors runtime_learners
## INFO [05:10:34.486] [bbotk] 1.124444 98.23286 0 0 44.832
## INFO [05:10:34.486] [bbotk] uhash
## INFO [05:10:34.486] [bbotk] a90984fa-76d1-4b58-ad66-a6f22cf67bc6
## INFO [05:10:34.584] [bbotk] Finished optimizing after 10 evaluation(s)
## INFO [05:10:34.586] [bbotk] Result:
## INFO [05:10:34.597] [bbotk] regr.glmnet.lambda learner_param_vals x_domain regr.mse
## INFO [05:10:34.597] [bbotk] 0.03 <list[3]> <list[1]> 98.22522
## INFO [05:10:48.780] [mlr3] Applying learner 'scale.imputemean.regr.glmnet.tuned' on task 'Delay' (iter 3/3)
## INFO [05:10:49.318] [bbotk] Starting to optimize 1 parameter(s) with '<TunerGridSearch>' and '<TerminatorEvals> [n_evals=20, k=0]'
## INFO [05:10:49.331] [bbotk] Evaluating 1 configuration(s)
## INFO [05:10:49.451] [mlr3] Running benchmark with 3 resampling iterations
## INFO [05:10:49.470] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 1/3)
## INFO [05:11:04.142] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 2/3)
## INFO [05:11:22.060] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 3/3)
## INFO [05:11:36.086] [mlr3] Finished benchmark
## INFO [05:11:36.337] [bbotk] Result of batch 1:
## INFO [05:11:36.348] [bbotk] regr.glmnet.lambda regr.mse warnings errors runtime_learners
## INFO [05:11:36.348] [bbotk] 0.4677778 89.65833 0 0 46.346
## INFO [05:11:36.348] [bbotk] uhash
## INFO [05:11:36.348] [bbotk] 8e15be5a-17b3-4821-af9a-88ddfe4aa967
## INFO [05:11:36.352] [bbotk] Evaluating 1 configuration(s)
## INFO [05:11:36.484] [mlr3] Running benchmark with 3 resampling iterations
## INFO [05:11:36.509] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 2/3)
## INFO [05:11:51.327] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 1/3)
## INFO [05:12:08.430] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 3/3)
## INFO [05:12:22.224] [mlr3] Finished benchmark
## INFO [05:12:22.429] [bbotk] Result of batch 2:
## INFO [05:12:22.438] [bbotk] regr.glmnet.lambda regr.mse warnings errors runtime_learners
## INFO [05:12:22.438] [bbotk] 1.562222 89.67329 0 0 45.494
## INFO [05:12:22.438] [bbotk] uhash
## INFO [05:12:22.438] [bbotk] 43b8b5b5-d3e2-4439-b98e-ee15be115694
## INFO [05:12:22.443] [bbotk] Evaluating 1 configuration(s)
## INFO [05:12:22.560] [mlr3] Running benchmark with 3 resampling iterations
## INFO [05:12:22.571] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 1/3)
## INFO [05:12:38.284] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 2/3)
## INFO [05:12:54.545] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 3/3)
## INFO [05:13:09.406] [mlr3] Finished benchmark
## INFO [05:13:09.630] [bbotk] Result of batch 3:
## INFO [05:13:09.639] [bbotk] regr.glmnet.lambda regr.mse warnings errors runtime_learners
## INFO [05:13:09.639] [bbotk] 0.6866667 89.66019 0 0 46.695
## INFO [05:13:09.639] [bbotk] uhash
## INFO [05:13:09.639] [bbotk] 14e4a2ed-3685-42a3-8e9e-bedabe9ef8b0
## INFO [05:13:09.645] [bbotk] Evaluating 1 configuration(s)
## INFO [05:13:09.765] [mlr3] Running benchmark with 3 resampling iterations
## INFO [05:13:09.782] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 2/3)
## INFO [05:13:28.831] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 3/3)
## INFO [05:13:45.223] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 1/3)
## INFO [05:13:58.363] [mlr3] Finished benchmark
## INFO [05:13:58.521] [bbotk] Result of batch 4:
## INFO [05:13:58.528] [bbotk] regr.glmnet.lambda regr.mse warnings errors runtime_learners
## INFO [05:13:58.528] [bbotk] 1.343333 89.66931 0 0 48.406
## INFO [05:13:58.528] [bbotk] uhash
## INFO [05:13:58.528] [bbotk] 74a9d11b-cee1-4361-b93f-564b784edb44
## INFO [05:13:58.534] [bbotk] Evaluating 1 configuration(s)
## INFO [05:13:58.632] [mlr3] Running benchmark with 3 resampling iterations
## INFO [05:13:58.644] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 2/3)
## INFO [05:14:12.913] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 3/3)
## INFO [05:14:29.827] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 1/3)
## INFO [05:14:43.602] [mlr3] Finished benchmark
## INFO [05:14:43.778] [bbotk] Result of batch 5:
## INFO [05:14:43.783] [bbotk] regr.glmnet.lambda regr.mse warnings errors runtime_learners
## INFO [05:14:43.783] [bbotk] 0.9055556 89.66269 0 0 44.811
## INFO [05:14:43.783] [bbotk] uhash
## INFO [05:14:43.783] [bbotk] f8ac3ce4-d689-4fbb-bc2a-d306fb2886de
## INFO [05:14:43.786] [bbotk] Evaluating 1 configuration(s)
## INFO [05:14:43.921] [mlr3] Running benchmark with 3 resampling iterations
## INFO [05:14:43.937] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 1/3)
## INFO [05:15:02.657] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 2/3)
## INFO [05:15:16.527] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 3/3)
## INFO [05:15:31.662] [mlr3] Finished benchmark
## INFO [05:15:31.807] [bbotk] Result of batch 6:
## INFO [05:15:31.813] [bbotk] regr.glmnet.lambda regr.mse warnings errors runtime_learners
## INFO [05:15:31.813] [bbotk] 1.124444 89.66575 0 0 47.525
## INFO [05:15:31.813] [bbotk] uhash
## INFO [05:15:31.813] [bbotk] ffb56033-50ea-4330-8106-bc4c8e42b71c
## INFO [05:15:31.817] [bbotk] Evaluating 1 configuration(s)
## INFO [05:15:31.900] [mlr3] Running benchmark with 3 resampling iterations
## INFO [05:15:31.919] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 3/3)
## INFO [05:15:48.216] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 1/3)
## INFO [05:16:02.117] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 2/3)
## INFO [05:16:19.574] [mlr3] Finished benchmark
## INFO [05:16:19.799] [bbotk] Result of batch 7:
## INFO [05:16:19.808] [bbotk] regr.glmnet.lambda regr.mse warnings errors runtime_learners
## INFO [05:16:19.808] [bbotk] 0.03 89.65695 0 0 47.483
## INFO [05:16:19.808] [bbotk] uhash
## INFO [05:16:19.808] [bbotk] ff4b7aa4-e975-4be0-80bf-f86e979a8a60
## INFO [05:16:19.818] [bbotk] Evaluating 1 configuration(s)
## INFO [05:16:20.114] [mlr3] Running benchmark with 3 resampling iterations
## INFO [05:16:20.167] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 3/3)
## INFO [05:16:34.736] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 2/3)
## INFO [05:16:49.734] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 1/3)
## INFO [05:17:04.785] [mlr3] Finished benchmark
## INFO [05:17:05.078] [bbotk] Result of batch 8:
## INFO [05:17:05.089] [bbotk] regr.glmnet.lambda regr.mse warnings errors runtime_learners
## INFO [05:17:05.089] [bbotk] 0.2488889 89.65722 0 0 44.415
## INFO [05:17:05.089] [bbotk] uhash
## INFO [05:17:05.089] [bbotk] 48629a4f-4884-46f5-a623-499004f8c2d8
## INFO [05:17:05.095] [bbotk] Evaluating 1 configuration(s)
## INFO [05:17:05.244] [mlr3] Running benchmark with 3 resampling iterations
## INFO [05:17:05.256] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 2/3)
## INFO [05:17:21.816] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 1/3)
## INFO [05:17:36.777] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 3/3)
## INFO [05:17:52.340] [mlr3] Finished benchmark
## INFO [05:17:52.585] [bbotk] Result of batch 9:
## INFO [05:17:52.594] [bbotk] regr.glmnet.lambda regr.mse warnings errors runtime_learners
## INFO [05:17:52.594] [bbotk] 2 89.68232 0 0 46.855
## INFO [05:17:52.594] [bbotk] uhash
## INFO [05:17:52.594] [bbotk] 61111ec1-bdb5-4793-9456-7c2abdb145cc
## INFO [05:17:52.601] [bbotk] Evaluating 1 configuration(s)
## INFO [05:17:52.938] [mlr3] Running benchmark with 3 resampling iterations
## INFO [05:17:53.032] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 3/3)
## INFO [05:18:06.766] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 1/3)
## INFO [05:18:20.638] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 2/3)
## INFO [05:18:42.146] [mlr3] Finished benchmark
## INFO [05:18:42.432] [bbotk] Result of batch 10:
## INFO [05:18:42.443] [bbotk] regr.glmnet.lambda regr.mse warnings errors runtime_learners
## INFO [05:18:42.443] [bbotk] 1.781111 89.67764 0 0 48.953
## INFO [05:18:42.443] [bbotk] uhash
## INFO [05:18:42.443] [bbotk] 0519da49-8068-4310-9343-9205dfc597c3
## INFO [05:18:42.506] [bbotk] Finished optimizing after 10 evaluation(s)
## INFO [05:18:42.509] [bbotk] Result:
## INFO [05:18:42.515] [bbotk] regr.glmnet.lambda learner_param_vals x_domain regr.mse
## INFO [05:18:42.515] [bbotk] 0.03 <list[3]> <list[1]> 89.65695
## INFO [05:19:04.191] [mlr3] Applying learner 'scale.imputemean.regr.glmnet.tuned' on task 'Delay' (iter 1/3)
## INFO [05:19:05.212] [bbotk] Starting to optimize 1 parameter(s) with '<TunerGridSearch>' and '<TerminatorEvals> [n_evals=20, k=0]'
## INFO [05:19:05.219] [bbotk] Evaluating 1 configuration(s)
## INFO [05:19:05.294] [mlr3] Running benchmark with 3 resampling iterations
## INFO [05:19:05.320] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 1/3)
## INFO [05:19:31.978] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 2/3)
## INFO [05:20:27.209] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 3/3)
## INFO [05:20:54.844] [mlr3] Finished benchmark
## INFO [05:20:55.354] [bbotk] Result of batch 1:
## INFO [05:20:55.378] [bbotk] regr.glmnet.lambda regr.mse warnings errors runtime_learners
## INFO [05:20:55.378] [bbotk] 0.03 95.76616 0 0 109.196
## INFO [05:20:55.378] [bbotk] uhash
## INFO [05:20:55.378] [bbotk] 1ca3b836-a1f1-4cce-b9fe-cc51dcfbdf13
## INFO [05:20:55.385] [bbotk] Evaluating 1 configuration(s)
## INFO [05:20:55.628] [mlr3] Running benchmark with 3 resampling iterations
## INFO [05:20:55.647] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 1/3)
## INFO [05:21:10.493] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 2/3)
## INFO [05:21:27.367] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 3/3)
## INFO [05:21:47.672] [mlr3] Finished benchmark
## INFO [05:21:48.560] [bbotk] Result of batch 2:
## INFO [05:21:48.590] [bbotk] regr.glmnet.lambda regr.mse warnings errors runtime_learners
## INFO [05:21:48.590] [bbotk] 2 95.78784 0 0 51.551
## INFO [05:21:48.590] [bbotk] uhash
## INFO [05:21:48.590] [bbotk] 8b40c5e0-8161-4a63-a9f9-b18887b2648c
## INFO [05:21:48.627] [bbotk] Evaluating 1 configuration(s)
## INFO [05:21:49.543] [mlr3] Running benchmark with 3 resampling iterations
## INFO [05:21:49.672] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 2/3)
## INFO [05:22:10.626] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 3/3)
## INFO [05:22:30.834] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 1/3)
## INFO [05:22:49.806] [mlr3] Finished benchmark
## INFO [05:22:50.037] [bbotk] Result of batch 3:
## INFO [05:22:50.048] [bbotk] regr.glmnet.lambda regr.mse warnings errors runtime_learners
## INFO [05:22:50.048] [bbotk] 1.343333 95.77653 0 0 59.589
## INFO [05:22:50.048] [bbotk] uhash
## INFO [05:22:50.048] [bbotk] 558cb2f2-b7bb-422e-8ac1-427e3e49bb18
## INFO [05:22:50.054] [bbotk] Evaluating 1 configuration(s)
## INFO [05:22:50.407] [mlr3] Running benchmark with 3 resampling iterations
## INFO [05:22:50.452] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 2/3)
## INFO [05:23:05.587] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 1/3)
## INFO [05:23:18.690] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 3/3)
## INFO [05:23:34.830] [mlr3] Finished benchmark
## INFO [05:23:35.250] [bbotk] Result of batch 4:
## INFO [05:23:35.267] [bbotk] regr.glmnet.lambda regr.mse warnings errors runtime_learners
## INFO [05:23:35.267] [bbotk] 0.6866667 95.76875 0 0 44.195
## INFO [05:23:35.267] [bbotk] uhash
## INFO [05:23:35.267] [bbotk] 44c44149-cddc-4825-b7d7-9d4527fc233a
## INFO [05:23:35.280] [bbotk] Evaluating 1 configuration(s)
## INFO [05:23:35.906] [mlr3] Running benchmark with 3 resampling iterations
## INFO [05:23:36.145] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 2/3)
## INFO [05:23:54.899] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 1/3)
## INFO [05:24:10.779] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 3/3)
## INFO [05:24:29.823] [mlr3] Finished benchmark
## INFO [05:24:30.204] [bbotk] Result of batch 5:
## INFO [05:24:30.223] [bbotk] regr.glmnet.lambda regr.mse warnings errors runtime_learners
## INFO [05:24:30.223] [bbotk] 0.4677778 95.7672 0 0 53.317
## INFO [05:24:30.223] [bbotk] uhash
## INFO [05:24:30.223] [bbotk] 2e53ea80-2c3d-4bc9-a847-3273f0a8d963
## INFO [05:24:30.241] [bbotk] Evaluating 1 configuration(s)
## INFO [05:24:30.682] [mlr3] Running benchmark with 3 resampling iterations
## INFO [05:24:30.773] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 3/3)
## INFO [05:24:51.202] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 2/3)
## INFO [05:25:07.990] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 1/3)
## INFO [05:25:23.056] [mlr3] Finished benchmark
## INFO [05:25:23.192] [bbotk] Result of batch 6:
## INFO [05:25:23.196] [bbotk] regr.glmnet.lambda regr.mse warnings errors runtime_learners
## INFO [05:25:23.196] [bbotk] 1.124444 95.77348 0 0 51.945
## INFO [05:25:23.196] [bbotk] uhash
## INFO [05:25:23.196] [bbotk] 96d932a6-0d7d-42bb-9d4f-2a5442f4952c
## INFO [05:25:23.198] [bbotk] Evaluating 1 configuration(s)
## INFO [05:25:23.302] [mlr3] Running benchmark with 3 resampling iterations
## INFO [05:25:23.321] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 3/3)
## INFO [05:25:38.360] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 1/3)
## INFO [05:25:52.365] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 2/3)
## INFO [05:26:07.488] [mlr3] Finished benchmark
## INFO [05:26:07.625] [bbotk] Result of batch 7:
## INFO [05:26:07.630] [bbotk] regr.glmnet.lambda regr.mse warnings errors runtime_learners
## INFO [05:26:07.630] [bbotk] 1.781111 95.78376 0 0 43.994
## INFO [05:26:07.630] [bbotk] uhash
## INFO [05:26:07.630] [bbotk] 45f0510d-f64c-497e-8239-09f680c09dd5
## INFO [05:26:07.633] [bbotk] Evaluating 1 configuration(s)
## INFO [05:26:07.719] [mlr3] Running benchmark with 3 resampling iterations
## INFO [05:26:07.740] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 1/3)
## INFO [05:26:44.946] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 2/3)
## INFO [05:26:59.837] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 3/3)
## INFO [05:27:17.904] [mlr3] Finished benchmark
## INFO [05:27:18.269] [bbotk] Result of batch 8:
## INFO [05:27:18.284] [bbotk] regr.glmnet.lambda regr.mse warnings errors runtime_learners
## INFO [05:27:18.284] [bbotk] 0.9055556 95.77086 0 0 69.979
## INFO [05:27:18.284] [bbotk] uhash
## INFO [05:27:18.284] [bbotk] b3f0d7fd-a55b-4f2d-9cfb-01e3443067ef
## INFO [05:27:18.291] [bbotk] Evaluating 1 configuration(s)
## INFO [05:27:18.573] [mlr3] Running benchmark with 3 resampling iterations
## INFO [05:27:18.611] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 3/3)
## INFO [05:27:34.863] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 2/3)
## INFO [05:27:50.554] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 1/3)
## INFO [05:28:10.164] [mlr3] Finished benchmark
## INFO [05:28:10.575] [bbotk] Result of batch 9:
## INFO [05:28:10.587] [bbotk] regr.glmnet.lambda regr.mse warnings errors runtime_learners
## INFO [05:28:10.587] [bbotk] 0.2488889 95.76631 0 0 51.38
## INFO [05:28:10.587] [bbotk] uhash
## INFO [05:28:10.587] [bbotk] eb30c4ca-6bee-4978-b5d1-cc32112d496d
## INFO [05:28:10.593] [bbotk] Evaluating 1 configuration(s)
## INFO [05:28:10.910] [mlr3] Running benchmark with 3 resampling iterations
## INFO [05:28:10.950] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 2/3)
## INFO [05:28:26.604] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 3/3)
## INFO [05:28:42.884] [mlr3] Applying learner 'scale.imputemean.regr.glmnet' on task 'Delay' (iter 1/3)
## INFO [05:28:57.596] [mlr3] Finished benchmark
## INFO [05:28:57.810] [bbotk] Result of batch 10:
## INFO [05:28:57.817] [bbotk] regr.glmnet.lambda regr.mse warnings errors runtime_learners
## INFO [05:28:57.817] [bbotk] 1.562222 95.77998 0 0 46.518
## INFO [05:28:57.817] [bbotk] uhash
## INFO [05:28:57.817] [bbotk] a5491f00-f8a4-484e-8967-e9d3186d5d8e
## INFO [05:28:57.886] [bbotk] Finished optimizing after 10 evaluation(s)
## INFO [05:28:57.888] [bbotk] Result:
## INFO [05:28:57.895] [bbotk] regr.glmnet.lambda learner_param_vals x_domain regr.mse
## INFO [05:28:57.895] [bbotk] 0.03 <list[3]> <list[1]> 95.76616
## INFO [05:29:14.478] [mlr3] Applying learner 'scale.imputemean.regr.ranger.tuned' on task 'Delay' (iter 1/3)
## INFO [05:29:15.310] [bbotk] Starting to optimize 1 parameter(s) with '<TunerGridSearch>' and '<TerminatorEvals> [n_evals=20, k=0]'
## INFO [05:29:15.332] [bbotk] Evaluating 1 configuration(s)
## INFO [05:29:15.492] [mlr3] Running benchmark with 3 resampling iterations
## INFO [05:29:15.537] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 1/3)
## INFO [05:29:32.794] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 2/3)
## INFO [05:29:46.953] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 3/3)
## INFO [05:30:03.771] [mlr3] Finished benchmark
## INFO [05:30:04.023] [bbotk] Result of batch 1:
## INFO [05:30:04.037] [bbotk] regr.ranger.num.trees regr.mse warnings errors runtime_learners
## INFO [05:30:04.037] [bbotk] 50 101.6354 0 0 48.047
## INFO [05:30:04.037] [bbotk] uhash
## INFO [05:30:04.037] [bbotk] 2e7f84af-5545-4841-b21e-f1eba4afd333
## INFO [05:30:04.048] [bbotk] Evaluating 1 configuration(s)
## INFO [05:30:04.259] [mlr3] Running benchmark with 3 resampling iterations
## INFO [05:30:04.289] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 2/3)
## INFO [05:30:23.210] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 1/3)
## INFO [05:30:38.516] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 3/3)
## INFO [05:30:55.942] [mlr3] Finished benchmark
## INFO [05:30:56.208] [bbotk] Result of batch 2:
## INFO [05:30:56.216] [bbotk] regr.ranger.num.trees regr.mse warnings errors runtime_learners
## INFO [05:30:56.216] [bbotk] 111 100.8052 0 0 51.547
## INFO [05:30:56.216] [bbotk] uhash
## INFO [05:30:56.216] [bbotk] 72c26eaa-3fc7-4953-b6d0-68b36c71c4b0
## INFO [05:30:56.223] [bbotk] Evaluating 1 configuration(s)
## INFO [05:30:56.388] [mlr3] Running benchmark with 3 resampling iterations
## INFO [05:30:56.411] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 2/3)
## INFO [05:31:17.292] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 1/3)
## INFO [05:31:37.714] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 3/3)
## INFO [05:31:59.639] [mlr3] Finished benchmark
## INFO [05:31:59.879] [bbotk] Result of batch 3:
## INFO [05:31:59.890] [bbotk] regr.ranger.num.trees regr.mse warnings errors runtime_learners
## INFO [05:31:59.890] [bbotk] 294 100.784 0 0 63.093
## INFO [05:31:59.890] [bbotk] uhash
## INFO [05:31:59.890] [bbotk] 69f34ed5-f41f-448c-b50e-3bc0c5fefb81
## INFO [05:31:59.901] [bbotk] Evaluating 1 configuration(s)
## INFO [05:32:00.103] [mlr3] Running benchmark with 3 resampling iterations
## INFO [05:32:00.137] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 3/3)
## INFO [05:32:25.782] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 1/3)
## INFO [05:32:50.133] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 2/3)
## INFO [05:33:15.679] [mlr3] Finished benchmark
## INFO [05:33:15.781] [bbotk] Result of batch 4:
## INFO [05:33:15.785] [bbotk] regr.ranger.num.trees regr.mse warnings errors runtime_learners
## INFO [05:33:15.785] [bbotk] 539 100.8647 0 0 75.435
## INFO [05:33:15.785] [bbotk] uhash
## INFO [05:33:15.785] [bbotk] d9ee3dca-ea30-486a-ba47-862ad1ed8738
## INFO [05:33:15.788] [bbotk] Evaluating 1 configuration(s)
## INFO [05:33:15.910] [mlr3] Running benchmark with 3 resampling iterations
## INFO [05:33:15.927] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 2/3)
## INFO [05:33:37.756] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 1/3)
## INFO [05:34:00.712] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 3/3)
## INFO [05:34:22.605] [mlr3] Finished benchmark
## INFO [05:34:22.694] [bbotk] Result of batch 5:
## INFO [05:34:22.699] [bbotk] regr.ranger.num.trees regr.mse warnings errors runtime_learners
## INFO [05:34:22.699] [bbotk] 417 100.505 0 0 66.551
## INFO [05:34:22.699] [bbotk] uhash
## INFO [05:34:22.699] [bbotk] f8d08cb0-8bd3-4442-844e-9826835e1433
## INFO [05:34:22.702] [bbotk] Evaluating 1 configuration(s)
## INFO [05:34:22.821] [mlr3] Running benchmark with 3 resampling iterations
## INFO [05:34:22.838] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 1/3)
## INFO [05:34:44.174] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 2/3)
## INFO [05:35:04.689] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 3/3)
## INFO [05:35:26.417] [mlr3] Finished benchmark
## INFO [05:35:26.636] [bbotk] Result of batch 6:
## INFO [05:35:26.643] [bbotk] regr.ranger.num.trees regr.mse warnings errors runtime_learners
## INFO [05:35:26.643] [bbotk] 356 100.5993 0 0 63.5
## INFO [05:35:26.643] [bbotk] uhash
## INFO [05:35:26.643] [bbotk] 2c5ff1a9-a22b-430c-9b16-bceed6c0cffc
## INFO [05:35:26.648] [bbotk] Evaluating 1 configuration(s)
## INFO [05:35:26.817] [mlr3] Running benchmark with 3 resampling iterations
## INFO [05:35:26.846] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 3/3)
## INFO [05:35:50.178] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 2/3)
## INFO [05:36:13.301] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 1/3)
## INFO [05:36:37.225] [mlr3] Finished benchmark
## INFO [05:36:37.341] [bbotk] Result of batch 7:
## INFO [05:36:37.348] [bbotk] regr.ranger.num.trees regr.mse warnings errors runtime_learners
## INFO [05:36:37.348] [bbotk] 478 100.8171 0 0 70.293
## INFO [05:36:37.348] [bbotk] uhash
## INFO [05:36:37.348] [bbotk] 31b8b2e3-9fa5-4173-b44d-5b7c41ef46ea
## INFO [05:36:37.353] [bbotk] Evaluating 1 configuration(s)
## INFO [05:36:37.481] [mlr3] Running benchmark with 3 resampling iterations
## INFO [05:36:37.503] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 3/3)
## INFO [05:36:54.885] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 2/3)
## INFO [05:37:10.624] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 1/3)
## INFO [05:37:29.015] [mlr3] Finished benchmark
## INFO [05:37:29.190] [bbotk] Result of batch 8:
## INFO [05:37:29.197] [bbotk] regr.ranger.num.trees regr.mse warnings errors runtime_learners
## INFO [05:37:29.197] [bbotk] 172 100.5946 0 0 51.418
## INFO [05:37:29.197] [bbotk] uhash
## INFO [05:37:29.197] [bbotk] 103cdefa-9d5c-41bf-8f15-7118b32db48c
## INFO [05:37:29.202] [bbotk] Evaluating 1 configuration(s)
## INFO [05:37:29.364] [mlr3] Running benchmark with 3 resampling iterations
## INFO [05:37:29.385] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 2/3)
## INFO [05:37:46.916] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 1/3)
## INFO [05:38:05.774] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 3/3)
## INFO [05:38:24.240] [mlr3] Finished benchmark
## INFO [05:38:24.305] [bbotk] Result of batch 9:
## INFO [05:38:24.307] [bbotk] regr.ranger.num.trees regr.mse warnings errors runtime_learners
## INFO [05:38:24.307] [bbotk] 233 100.9425 0 0 54.744
## INFO [05:38:24.307] [bbotk] uhash
## INFO [05:38:24.307] [bbotk] 3eb3b18c-5c06-4d9c-8f1b-3ea2555324f6
## INFO [05:38:24.309] [bbotk] Evaluating 1 configuration(s)
## INFO [05:38:24.354] [mlr3] Running benchmark with 3 resampling iterations
## INFO [05:38:24.366] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 2/3)
## INFO [05:38:50.036] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 1/3)
## INFO [05:39:16.375] [mlr3] Applying learner 'scale.imputemean.regr.ranger' on task 'Delay' (iter 3/3)
## INFO [05:39:44.380] [mlr3] Finished benchmark
## INFO [05:39:44.458] [bbotk] Result of batch 10:
## INFO [05:39:44.462] [bbotk] regr.ranger.num.trees regr.mse warnings errors runtime_learners
## INFO [05:39:44.462] [bbotk] 600 100.9782 0 0 79.936
## INFO [05:39:44.462] [bbotk] uhash
## INFO [05:39:44.462] [bbotk] 7129e28c-6760-4d24-80a2-11c73f84b2fb
## INFO [05:39:44.476] [bbotk] Finished optimizing after 10 evaluation(s)
## INFO [05:39:44.477] [bbotk] Result:
## INFO [05:39:44.480] [bbotk] regr.ranger.num.trees learner_param_vals x_domain regr.mse
## INFO [05:39:44.480] [bbotk] 417 <list[3]> <list[1]> 100.505
## INFO [05:40:14.507] [mlr3] Finished benchmark
autoplot(bmr) +
theme_bw() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
bmr$aggregate(measure)
## nr resample_result task_id learner_id
## 1: 1 <ResampleResult[22]> Delay imputemean.regr.lm
## 2: 2 <ResampleResult[22]> Delay scale.imputemean.regr.glmnet
## 3: 3 <ResampleResult[22]> Delay scale.imputemean.regr.glmnet.tuned
## 4: 4 <ResampleResult[22]> Delay scale.imputemean.regr.ranger.tuned
## resampling_id iters regr.mse
## 1: cv 3 94.54240
## 2: cv 3 94.54240
## 3: cv 3 94.54238
## 4: cv 3 99.45913
#nr resample_result task_id
#1: 1 <ResampleResult[22]> Delay
#2: 2 <ResampleResult[22]> Delay
#3: 3 <ResampleResult[22]> Delay
#4: 4 <ResampleResult[22]> Delay
#learner_id resampling_id iters
#1: imputemean.regr.lm cv 3
#2: scale.imputemean.regr.glmnet cv 3
#3: scale.imputemean.regr.glmnet.tuned cv 3
#4: scale.imputemean.regr.ranger.tuned cv 3
#regr.mse
#1: 68.32703
#2: 68.32712
#3: 68.32669
#4: 31.95522
Random Forests performed the best over the other regression models with an MSE of 31.955.